#--------------------------------------------------------------------------------------
#
# ERpath_v17.R - code to model the ER pathway
#
# December 2014
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------
library(grDevices)
library(RColorBrewer)
library(stringr)
library(pca3d)
library(openxlsx)
source("utils.R")

#===============================================================================
# Pathway-specific - start
#===============================================================================
PATHWAY <<- "ER"
NASSAY <<- 18
NRECEPTOR <<- 9
NRECEPTOR0 <<- 9
NRECEPTOR <<- 26
#AUCSCALE1 <<-  2.3  # put the top of the most active chemical to 1.0 
#AUCSCALE2 <<-  1.87 # makes the top AUC==1.0
SPECIFIC.AUC.CUTOFF <<- 0.1

#PENALTY.METHOD <<- "RIDGE"
#PENALTY.METHOD <<- "LASSO"
PENALTY.METHOD <<- "THRESHOLD"

ALPHA0 <<- 1

#===============================================================================
# Pathway-specific - stop
#===============================================================================
HEATMAP.CMETHOD <<- "ward.D"
NCONC <- 45
CONCLIST <<- c(1e-6,2.679636e-06,4.019455e-06,6.029182e-06,9.043773e-06,1.356566e-05,2.034849e-05,3.052273e-05,4.57841e-05,6.867615e-05,0.0001030142,0.0001545213,0.000231782,0.000347673,0.0005215095,0.0007822643,0.001173396,0.001760095,0.002640142,0.003960213,0.005940319,0.008910479,0.01336572,0.02004858,0.03007287,0.0451093,0.06766395,0.1014959,0.1522439,0.2283658,0.3425487,0.5138231,0.7707347,1.156102,1.734153,2.601229,3.901844,5.852766,8.77915,13.16872,19.75309,29.62963,44.44444,66.66667,100)
#--------------------------------------------------------------------------------------
#
# run all
#
#--------------------------------------------------------------------------------------
run.all <- function(do.prep=F,do.ref=F,do.all=F,do.EAS=F,do.summary=F) {
    if(do.prep) {
    	load.data(pathway=PATHWAY) 
    	apply.filters()
    	Tmat.va()
        LS.legend(to.file=T)
        AC50HM(to.file=T)
        prep.literature()
    }
    if(do.ref) {
        prepCRref()

        ALPHA <<- 0.01
		AUCSCALE1 <<-  2.2  # put the top of the most active chemical to 1.0 
		AUCSCALE2 <<-  1.32 # makes the top AUC==1.0
        LSref(to.file=T)
        RHM(mode="ref",to.file=T)
        AUCcalc(mode="ref")
        AUCHM(mode="ref",to.file=T,dcut=10)
        refchem.dist(to.file=T)
        refchem.laneplot(to.file=T)
        refchem.laneplot.log(to.file=T)

        ALPHA <<- 1
		AUCSCALE1 <<-  3.8  # put the top of the most active chemical to 1.0 
		AUCSCALE2 <<-  1.24 # makes the top AUC==1.0
        LSref(to.file=T)
        RHM(mode="ref",to.file=T)
        AUCcalc(mode="ref")
        AUCHM(mode="ref",to.file=T,dcut=10)
        refchem.dist(to.file=T)
        refchem.laneplot(to.file=T)
        refchem.laneplot.log(to.file=T)

        ALPHA <<- 100
		AUCSCALE1 <<-  6.2  # put the top of the most active chemical to 1.0 
		AUCSCALE2 <<-  1.20 # makes the top AUC==1.0
        LSref(to.file=T)
        RHM(mode="ref",to.file=T)
        AUCcalc(mode="ref")
        AUCHM(mode="ref",to.file=T,dcut=10)
        refchem.dist(to.file=T)
        refchem.laneplot(to.file=T)
        refchem.laneplot.log(to.file=T)
    }
    if(do.all) {
        prepCRall()
        
        ALPHA <<- 0.01
		AUCSCALE1 <<-  2.2  # put the top of the most active chemical to 1.0 
		AUCSCALE2 <<-  1.32 # makes the top AUC==1.0
        LSall(to.file=T)
        AUCcalc(mode="all")
        AUCHM(mode="all",to.file=T,dcut=10)
        
        ALPHA <<- 1
		AUCSCALE1 <<-  3.8  # put the top of the most active chemical to 1.0 
		AUCSCALE2 <<-  1.24 # makes the top AUC==1.0
        LSall(to.file=T)
        AUCcalc(mode="all")
        AUCHM(mode="all",to.file=T,dcut=10)
        
        ALPHA <<- 100
		AUCSCALE1 <<-  6.2  # put the top of the most active chemical to 1.0 
		AUCSCALE2 <<-  1.20 # makes the top AUC==1.0
        LSall(to.file=T)
        AUCcalc(mode="all")
        AUCHM(mode="all",to.file=T,dcut=10)
    }
    if(do.EAS) {
    	EAScalc("FP","R1")
    	EAScalc("FP","R2")
    }
    if(do.summary) {
    	prep.supermatrix()
    	filename <- "../output/superMatrix.csv"
		temp <- read.csv(file=filename,stringsAsFactors=F)
		rownames(temp) <- temp[,"CODE"]
		SUPERMATRIX <<- temp
    	allstats()
    	dx.specificity.zt()
		dx.structure.specificity.zt(super=F)
		dx.pains.ztfiltered()
		dx.atg.crosstalk()
		dx.nvs.crosstalk()
		dx.tox21.crosstalk()
		dx.flag.filter()
    	dx.physchem.specificity.ztfiltered()
    	
    	comp.lit(nmin=4)
    	receptor.tree(SPECIFIC.AUC.CUTOFF)
    	pseudoAC50.AUC(T,SPECIFIC.AUC.CUTOFF)
    	summaryTable(to.file=T)
        if(PATHWAY=="ER") {
        	comp.old.new(to.file=T)
        	AUC.vs.rotroff(to.file=T,mode="ref")
        	AUC.vs.rotroff(to.file=T,mode="all")
        }
    }
}
#--------------------------------------------------------------------------------------
#
# load data required for the calculations
#
#--------------------------------------------------------------------------------------
load.data <- function(pathway="ER") {
    cat("==========================================================================\n")
    cat("load data\n")
    cat("==========================================================================\n")

    file <- paste("../input/Pathway_",pathway,"_Chemicals.txt",sep="")
    name.temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
    rownames(name.temp) <- name.temp[,"CODE"]
    CHEMS <<- name.temp
    CODE.LIST <<- name.temp[,"CODE"]
    NCHEM <<- length(CODE.LIST)

    file <- "../input/ToxCast_GenericChemicals_2014_11_24.csv"
	temp <- read.csv(file=file,stringsAsFactors=F)
	rownames(temp) <- temp[,"CODE"]
	temp <- temp[CODE.LIST,]
	chems <- CHEMS
	chems <- cbind(chems,temp[,"use_super_category"])
	names(chems)[dim(chems)[2]] <- "use_super_category"
	CHEMS <<- chems
    cat("loaded Chemical Information\n"); flush.console()
    
    file <- paste("../input/Pathway_",pathway,"_AC50.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp2 <- temp[,5:dim(temp)[2]]
    MAT.AC50 <<- temp2
    ASSAY.LIST <<- names(MAT.AC50)

    temp2 <- -log10(temp2/1000000)
    MAT.AC50.LOG <<- temp2
    temp2[temp2>0] <- 1
    MAT.AC50.DISC <<- temp2
    cat("dim(MAT.AC50):",dim(MAT.AC50),"\n")

    file <- paste("../input/Pathway_",pathway,"_Z.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    ZSCORE <<- temp

	file <- paste("../input/Pathway_",pathway,"_Cytotox.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
    rownames(temp) <- temp[,"CODE"]
    CYTOTOX <<- temp

    file <- paste("../input/Pathway_",pathway,"_T.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.T <<- temp

    file <- paste("../input/Pathway_",pathway,"_W.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.W <<- temp

    file <- paste("../input/Pathway_",pathway,"_Emax.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.EMAX <<- temp

    file <- paste("../input/Pathway_",pathway,"_max_conc.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.MAXCONC <<- 10**temp

    file <- paste("../input/Pathway_",pathway,"_AC10.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.AC10 <<- temp

    file <- paste("../input/Pathway_",pathway,"_ACB.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.ACB <<- temp

    file <- paste("../input/Pathway_",pathway,"_ACC.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,5:dim(temp)[2]]
    MAT.ACC <<- temp

    cat("loaded ASSAY matrix: ",dim(MAT.AC50),"\n")
    temp0 <- MAT.AC50.LOG[,2]
    mask0 <- temp0
    mask0[temp0>0] <- 1
    meanvals <- vector(mode="numeric",length=NASSAY)
    meanvals[] <- 0
    for(i in 1:NASSAY) {
        temp1 <- MAT.AC50.LOG[,i]
        mask1 <- temp1
        mask1[mask1>0] <- 1
        mask <- mask0*mask1
        temp1 <- temp1[mask==1]
        meanvals[i] <- mean(temp1)
    }

    file <- paste("../input/refchems/",pathway,"_refchems.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    refnames <- temp[is.element(temp[,1],row.names(MAT.AC50)),1]
    refchems <- temp[is.element(temp[,1],refnames),]
    REFCHEMS <<- refchems

    cat("Unique chemicals: ",NCHEM,"\n")

    file <- "../input/tox21_qc.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
    TOX21.QC <<- temp

    file <- paste("../input/Pathway_",pathway,"_Flags.txt",sep="")
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    names(temp)[5] <- "CODE"
    names(temp)[4] <- "Name"
    names(temp)[7] <- "Assay"
    CAUTION.FLAGS <<- temp

    file <- "../input/EDSP_universe_input_2014_04_17.txt"
    EDSP.UNIVERSE <<- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    
    filename <- "../input/AllResults_tested_Matrix_141017.csv"
	tested <- read.csv(file=filename,stringsAsFactors=F,row.names=1)
    filename <- "../input/AllResults_hitc_Matrix_141017.csv"
	hitc <- read.csv(file=filename,stringsAsFactors=F,row.names=1)
    filename <- "../input/AllResults_zscore_Matrix_141017.csv"
	zmat <- read.csv(file=filename,stringsAsFactors=F,row.names=1)
	tested <- tested[CODE.LIST,]
	hitc <- hitc[CODE.LIST,]
	zmat <- zmat[CODE.LIST,]
	
	TOXCAST.TESTED <<- tested
	TOXCAST.HITC <<- hitc
	TOXCAST.ZMAT <<- zmat
	
	talist <- names(zmat)
	talist <- talist[!is.element(talist,ASSAY.LIST)]
	tested.noer <- tested[,talist]
	hitc.noer <- hitc[,talist]
	zmat.noer <- zmat[,talist]
	
	NTESTED <<- rowSums(tested)

	temp <- hitc
	temp[is.na(temp)] <- 0
	hit.temp <- temp
	HITS <<- rowSums(temp)    

	temp <- zmat
	temp[is.na(temp)] <- -100
	temp[hit.temp==0] <- -100
	temp[temp<3] <- 0
	temp[temp>3] <- 1
	HITS.Z.HI <<- rowSums(temp)   

	temp <- zmat
	temp[is.na(temp)] <- 100
	temp[hit.temp==0] <- 100
	temp[temp<=0] <- 0.1
	temp[temp>=3] <- 0
	temp[temp>0] <- 1
	HITS.Z.LO <<- rowSums(temp)   
}
#--------------------------------------------------------------------------------------
#
# apply the QC flags and filters
#
#===============================================================================
# Pathway-specific - update pathway-specific assay behavior
#===============================================================================
#--------------------------------------------------------------------------------------
apply.filters <- function() {
    cat("==========================================================================\n")
    cat("apply filters\n")
    cat("==========================================================================\n")

    cat("ACEA 50% filter\n")
    assay <- "ACEA_T47D_80hr_Positive"
    mask <- MAT.AC50.DISC[,assay]
    emax <- MAT.EMAX[,assay]
    top <- MAT.T[,assay]
    ac50 <- MAT.AC50[,assay]
    mask[emax<50] <- 0
    MAT.AC50.DISC[,assay] <- mask
    top[mask==0] <- 0
    MAT.T[,assay] <- top
    ac50[mask==0] <- 1000000
    MAT.AC50[,assay] <- ac50

    cat("Novascreen 50% x low AC50 filter\n")
    assay.list <- c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa")
    for(i in 1:length(assay.list)) {
        assay <- assay.list[i]
        mask <- MAT.AC50.DISC[,assay]
        emax <- MAT.EMAX[,assay]
        top <- MAT.T[,assay]
        ac50 <- MAT.AC50[,assay]

        mask.emax <- mask
        mask.emax[] <- 0
        mask.ac50 <- mask
        mask.ac50[] <- 0
        mask.top <- mask
        mask.top[] <- 0
        mask.emax[emax<50] <- 1
        mask.ac50[ac50<10] <- 1
        mask.top[top<50] <- 1
        mask.combine <- mask.emax * mask.ac50
        mask.combine <- mask.top
        mask[mask.combine==1] <- 0

        MAT.AC50.DISC[,assay] <- mask
        top[mask==0] <- 0
        MAT.T[,assay] <- top
        ac50[mask==0] <- 1000000
        MAT.AC50[,assay] <- ac50
    }

    MAT.AC50 <<- MAT.AC50
    MAT.T <<- MAT.T
    MAT.AC50.DISC <<- MAT.AC50.DISC
    temp <- -log10(MAT.AC50/1000000)
    MAT.AC50.LOG <<- temp
}
#--------------------------------------------------------------------------------------
#
# prep the T matrix with a variable number of assays
#
#===============================================================================
# Pathway-specific - update pathway-specific TMAT
#
# TMAT is a matrix whose elements mathc the connectivity in the pathway drawing
#===============================================================================
#--------------------------------------------------------------------------------------
Tmat.va <- function() {
    temp <- matrix(nrow=NASSAY,ncol=NRECEPTOR)
    temp[] <- 0
    temp[,1] <-  c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0)
    temp[,2] <-  c(1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1)
    temp[,3] <-  c(1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
    temp[,4] <-  c(0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0)
    temp[,5] <-  c(0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0)
    temp[,6] <-  c(0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0)
    temp[,7] <-  c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0)
    temp[,8] <-  c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0)
    temp[,9] <-  c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)

	# NVS
    temp[,10] <- c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
    temp[,11] <- c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
    temp[,12] <- c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)

	# OT PC
    temp[,13] <- c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
    temp[,14] <- c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
    temp[,15] <- c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
    temp[,16] <- c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0)
    temp[,17] <- c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
    temp[,18] <- c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0)

    # OT RE
    temp[,19] <- c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0)
    temp[,20] <- c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0)

    # ATG
    temp[,21] <- c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
    temp[,22] <- c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)

    # NCGC Agonist
    temp[,23] <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
    temp[,24] <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0)

    # NCGC Antagonist
    temp[,25] <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0)
    temp[,26] <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)

    TMAT <<- temp
}
#--------------------------------------------------------------------------------------
#
# Calculate the least squares solution for one chemical
#
#===============================================================================
# Pathway-specific - update pathway-specific assay behavior
#===============================================================================
#--------------------------------------------------------------------------------------
LS.legend <- function(to.file=F) {
    if(to.file) {
        fname <- "../plots/model_legend.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }

    ytop <- 2.8
    plot(1~1,type="n",col.axis="white",tcl=0.01,cex.axis=0.1,cex.lab=0.1,xlim=c(0,5),ylim=c(0,ytop),xlab="",ylab="",lwd=3,main="")
    text(x=1,y=ytop,labels="Assay Legend",pos=4,cex=1.2)
    dy <- 0.085
    ytop <- ytop - 0.05

    anames <- vector(length=NASSAY,mode="character")
    anames[1] <- "A1: bovine ER cell-free radioligand binding (NVS)"
    anames[2] <- "A2: human ER cell-free radioligand binding (NVS)"
    anames[3] <- "A3: mouse ERa cell-free radioligand binding (NVS)"
    anames[4] <- "A4: ERa-ERa protein complementation/FRET 8 hr (OT)"
    anames[5] <- "A5: ERa-ERa protein complementation/FRET 24 hr (OT)"
    anames[6] <- "A6: ERa-ERb protein complementation/FRET 8 hr (OT)"
    anames[7] <- "A7: ERa-ERb protein complementation/FRET 24 hr (OT)"
    anames[8] <- "A8: ERb-ERb protein complementation/FRET 8 hr (OT)"
    anames[9] <- "A9: ERb-ERb protein complementation/FRET 24 hr (OT)"
    anames[10] <- "A10: ERE chromatin binding (PCA/FRET) 8 hr (OT)"
    anames[11] <- "A11: ERE chromatin binding (PCA/FRET) 24 hr (OT)"
    anames[12] <- "A12: ERa-TRANS reporter gene (ATG)"
    anames[13] <- "A13: ERE-CIS reporter gene (ATG)"
    anames[14] <- "A14: ERa beta-lactamase agonist reporter gene (Tox21)"
    anames[15] <- "A15: ERa luciferase-BG1 agonist reporter gene (Tox21)"
    anames[16] <- "A16: T47D real-time cell proliferation (ACEA)"
    anames[17] <- "A17: ERa beta-lactamase antagonist reporter gene (Tox21)"
    anames[18] <- "A18: ERa luciferase-BG1 antagonist reporter gene (Tox21)"

    cols <- c("black","black","black","green","green","green","green","green","green","gray","gray","hotpink","hotpink","cyan","cyan","yellow4","orange","orange")
    ltys <- c(1,2,3, 1,1,2,2,3,3, 1,2, 1,2, 1,2,1, 1,2, 1,2)
    for(i in 1:NASSAY) {
        lines(x=c(0.01,1),y=c(ytop-dy*i,ytop-dy*i),col=cols[i],lwd=3,lty=ltys[i])
        text(x=1,y=ytop-dy*i,labels=anames[i],pos=4,cex=0.9)
    }

    ltys <- c( 1,     1,     1,       1,      1,     2,        1,     1,       1,       2,       1,       2)
    cols <- c("blue","red", "black", "green", "gray","hotpink","cyan","yellow4","orange")
    rnames <- vector(length=NRECEPTOR,mode="character")
    rnames[1] <- "R1: Agonist Model"
    rnames[2] <- "R2: Antagonist Model"
    rnames[3] <- "R3: Interference: cell-free radioligand binding (NVS)"
    rnames[4] <- "R4: Interference: protein complementation (PCA)/FRET (OT)"
    rnames[5] <- "R5: Interference: chromatin binding PCA/FRET (OT)"
    rnames[6] <- "R6: Interference: RNA reporter gene agonist (ATG)"
    rnames[7] <- "R7: Interference: protein reporter gene agonist (Tox21)"
    rnames[8] <- "R8: Interference: cell proliferation (ACEA)"
    rnames[9] <- "R9: Interference: protein reporter antagonist (Tox21)"
    ytop <- 0.85
    text(x=1,y=ytop,labels="Receptor Legend",pos=4,cex=1.2)
    ytop <- ytop-0.05
    for(i in 1:NRECEPTOR0) {
        lines(x=c(0.01,1),y=c(ytop-dy*i,ytop-dy*i),col=cols[i],lwd=3,lty=ltys[i])
        text(x=1,y=ytop-dy*i,rnames[i],pos=4,cex=0.9)
    }
    if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# do the hierarchical clustering on the AUC matrix
#
#--------------------------------------------------------------------------------------
AC50HM <- function(to.file=F) {
    if(to.file) {
        fname <- "../plots/chem_AC50_heatmap.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    cmethod <- HEATMAP.CMETHOD
    dmat <- MAT.AC50
    dmat <- -log10(dmat/1000000)
    rs <- rowSums(dmat)

    dmat <- dmat[rs>0,]
    nchem <- dim(dmat)[1]
    namelist <- c()
    for(i in 1:NASSAY) namelist <- c(namelist,paste("A",i,sep=""))
    names(dmat) <- namelist
    cat("getting ready to run heatmap\n")
    main <- paste("AC50 Heatmap:",nchem,"chemicals")
    heatmap(as.matrix(dmat),margins=c(5,5),scale="none",labRow="",
            xlab="",ylab="",cexCol=1,cexRow=0.1,col=brewer.pal(9,"Reds"),
            hclustfun=function(x) hclust(d=dist(x),method=cmethod),
            main=main)
    if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# prep the concentration-response matrix for the reference chemicals
#
#--------------------------------------------------------------------------------------
prepCRref <- function() {
    dir <- "../input/CRref"
    CHEMS <- REFCHEMS[,1]
    for(i in 1:length(CHEMS)) {
        code <- CHEMS[i]
        prepCR(code,dir)
    }
}
#--------------------------------------------------------------------------------------
#
# prep the concentration-response matrix for all chemicals
#
#--------------------------------------------------------------------------------------
prepCRall <- function() {
    dir <- "../input/CRall"
    CHEMS <- CHEMS[,"CODE"]
    for(i in 1:length(CHEMS)) {
        code <- CHEMS[i]
        if(!is.na(code)) prepCR(code,dir)
    }
}
#--------------------------------------------------------------------------------------
#
# prep the concentration-response matrix for a single chemical
#
#--------------------------------------------------------------------------------------
prepCR <- function(code="C85687",dir="../input/CRref",do.debug=F) {
    cat(code,"\n")
    flush.console()

    ac50 <- vector(length=NASSAY,mode="numeric")
    top <- vector(length=NASSAY,mode="numeric")
    w <- vector(length=NASSAY,mode="numeric")
    z <- vector(length=NASSAY,mode="numeric")
    emax <- vector(length=NASSAY,mode="numeric")
    maxconc <- vector(length=NASSAY,mode="numeric")

    for(i in 1:NASSAY) {
    	assay <- names(MAT.AC50)[i]
	    ac50[i] <- MAT.AC50[code,assay]
	    top[i] <-  MAT.T[code,assay]
	    w[i] <-  MAT.W[code,assay]
	    emax[i] <-  MAT.EMAX[code,assay]
	    maxconc[i] <-  MAT.MAXCONC[code,assay]
	    z[i] <-  ZSCORE[code,assay]
    }
    top[is.na(top)] <- 0
    emax[is.na(emax)] <- 0
    w[is.na(w)] <- 1
    if(do.debug) {
    	print(ac50)
    	print(top)
    	print(w)
    	print(emax)
    	print(maxconc)
    	print(z)
    	browser()
    }
    z[z<3] <- 0
    z[z>3] <- 1
    top[top>1000] <- 0
    w[w>1000] <- 1
    top <- top/100
    top[top>1] <- 1
    cr.mat <- matrix(nrow=length(CONCLIST),ncol=NASSAY)
    cr.mat[] <- 0
    for(i in 1:length(CONCLIST)) {
        conc <- CONCLIST[i]
        for(j in 1:NASSAY) {
            ac50j <- as.numeric(ac50[j])
            tj <- as.numeric(top[j])
            wj <- as.numeric(w[j])
            emaxj <- as.numeric(emax[j])
            zj <- as.numeric(z[j])
            if(is.na(emaxj)) emaxj <- 0
            cr.mat[i,j] <- tj*(conc**wj/(conc**wj+ac50j**wj))
            if(ac50[j]==2000000) cr.mat[i,j] <- NA
        }
    }

    fname <- paste(dir,"/CRMAT_",code,".txt",sep="")
    write.table(cr.mat,fname,sep="\t",row.names=F)
    return(cr.mat)
}
#--------------------------------------------------------------------------------------
#
# Calculate the least squares solution for all reference chemicals
#
#--------------------------------------------------------------------------------------
LSref <- function(to.file=F,istart=1,do.debug=F) {
    dir <- "../input/CRref"
    Tmat.va()
    par(mfrow=c(3,2),mar=c(4,4,4,1))
    if(to.file) {
        fname <- paste("../plots/refchem_perf_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
        par(mfrow=c(3,2),mar=c(4,4,4,1))
    }
    fname.data <- paste("../output/refchem_resmat_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    s <- paste("CODE\tName")

    cat("",file=fname.data,append=F)

    nreceptor <- NRECEPTOR
    nmax <- dim(REFCHEMS)[1]
    for(i in istart:nmax) {
        iuse <- i
        code <- REFCHEMS[iuse,1]
        sname <- CHEMS[code,"Name"]
        cat(code,"[",sname,"] ",i,":",nmax,"\n")
        flush.console()
        potency.string <- ""
        if(!is.na(REFCHEMS[iuse,4])) {
        	potency.string <- paste("Agonist: ",REFCHEMS[iuse,4],sep="")
        	if(!is.na(REFCHEMS[iuse,5])) potency.string <- paste(potency.string,"\nAntagonist: ",REFCHEMS[iuse,5],sep="")
      	}
      	else {
      		potency.string <- paste("Antagonist: ",REFCHEMS[iuse,5],sep="")
      	}
		resmat <- LSone.va(code,sname,potency.string,dir=dir,xmin.plot=0.000001,do.debug)
        s <- paste(code,REFCHEMS[iuse,3],sep="\t")
        for(j in 1:nreceptor) {
            for(k in 1:NCONC) s <- paste(s,resmat[k,j],sep="\t")
        }
        cat(paste(s,"\n",sep=""),file=fname.data,append=T)
        if(!to.file) browser()
    }
    if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# Calculate the least squares solution for all chemicals
#
#--------------------------------------------------------------------------------------
LSall <- function(to.file=F,istart=1,do.debug=F) {
    dir <- "../input/CRall"
    Tmat.va()
    par(mfrow=c(3,2),mar=c(4,4,4,1))
    if(to.file) {
        fname <- paste("../plots/allchem_perf_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
        par(mfrow=c(3,2),mar=c(4,4,4,1))
    }
	fname.data <- paste("../output/allchem_resmat_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    cat("",file=fname.data,append=F)
    nmax <- dim(CHEMS)[1]
    nreceptor <- NRECEPTOR
    for(i in istart:nmax) {
        iuse <- i
        code <- CHEMS[iuse,"CODE"]
        sname <- CHEMS[code,"Name"]
        cat(code,"[",sname,"] ",i,":",nmax,"\n")
        flush.console()
        resmat <- LSone.va(code,sname,"",dir,do.debug)
        s <- paste(code,sname,sep="\t")
        for(j in 1:nreceptor) {
            for(k in 1:NCONC) s <- paste(s,resmat[k,j],sep="\t")
        }
        cat(paste(s,"\n",sep=""),file=fname.data,append=T)
        if(!to.file && i%%3==0) browser()
    }
    if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# Calculate the least squares solution for one chemical
#
#===============================================================================
# Pathway-specific - update pathway-specific line color, etc. behavior
#===============================================================================
#--------------------------------------------------------------------------------------
LSone.va <- function(code="C80057",cname="BPA",strength="Strong",dir="../input/CRref",do.debug=F,xmin.plot=0.01) {
    Tmat.va()
    file <- paste(dir,"/CRMAT_",code,".txt",sep="")
    adata <- read.table(file,header=T,sep="\t")

    casrn <- CHEMS[code,"CASRN"]
    nconc <- length(CONCLIST)
    nassay <- NASSAY
    rmat <- matrix(nrow=nconc,ncol=nassay)
    rmat[] <- 0
    adata <- as.data.frame(t(adata))
    adata <- cbind(adata,TMAT)
    conc.names <- c()
    for(i in 1:NCONC) conc.names <- c(conc.names,paste("C",i,sep=""))
    t.names <- c()
    for(i in 1:NRECEPTOR) t.names <- c(t.names,paste("T",i,sep=""))
    names(adata) <- c(conc.names,t.names)
    nuse <- NCONC

    qc.string <- paste("Chemical QC [")
    qc.temp <- as.data.frame(TOX21.QC[is.element(TOX21.QC[,"CODE"],code),])
    if(dim(qc.temp)[1]>0) {
        for(q in 1:dim(qc.temp)[1]) qc.string <- paste(qc.string,qc.temp[q,"grade"])
    }
    qc.string <- paste(qc.string,"]")
    qc.string=""
                                        # plot the interpolated assay values
    temp <- adata[1,1:NCONC]
    a1 <- as.numeric(temp[1:nuse])
    xlist <- CONCLIST
    cols <- c("black","black","black","green","green","green","green","green","green","gray","gray","hotpink","hotpink","cyan","cyan","yellow4","orange","orange","khaki3","khaki3")
    anames <- c()
    for(i in 1:NASSAY) anames <- c(anames,paste("A",i,sep=""))
    ltys <- c(1,2,3, 1,1,2,2,3,3, 1,2, 1,2, 1,2,1, 1,2, 1,2)
    plot(a1~xlist,xlim=c(xmin.plot,200),log="x",ylim=c(0,1),cex.lab=1.2,cex.axis=1.2,xlab="Concentration (uM)",ylab="Efficacy - fraction(E2)",lwd=3,col=cols[1],lty=ltys[1],main=paste(casrn,":",cname,"\n",qc.string),type="n")
    #text(x=xmin.plot,y=0.9,strength,cex=1,pos=4)
    counter <- 1

    cytotox.median <- CYTOTOX[code,"cytotox_median_um"]
    cytotox.lower <- CYTOTOX[code,"cytotox_lower_bound_um"]
    
    if(cytotox.median<1000) {
    	rect(cytotox.lower,0,1000,1,col="gray80",border="black")
        lines(c(cytotox.median,cytotox.median),c(0,1),lwd=3,col="red")
    }
    lines(c(1e-6,1e6),c(1,1),col="black",lwd=1)

    cautions.by.code <- CAUTION.FLAGS[is.element(CAUTION.FLAGS[,"CODE"],code),]

    for(i in 1:nassay) {
        assay <- ASSAY.LIST[i]
        max.conc <- MAT.MAXCONC[code,assay]
        temp <- adata[i,1:NCONC]
        ai <- as.numeric(temp[1:nuse])
    	cautions.by.assay <- as.data.frame(cautions.by.code[is.element(cautions.by.code[,"Assay"],assay),])
    	activity <- MAT.AC50.DISC[code,assay]
        if(!is.na(max.conc)) {
            iuse0 <- interpolate.conc(xlist,ai,max.conc)$iuse
            ai <- as.numeric(temp[1:iuse0])
            xi <- xlist[1:iuse0]
        }
        else {
            xi <- xlist
        }
        if(activity==1 && dim(cautions.by.assay)[1]>0) {
            clist <- cautions.by.assay[,"flag"]
            doit <- F
            pch <- 4
            if(is.element("Multiple points above baseline, inactive",clist)) {doit <- T; pch <- 2}
            if(is.element("Borderline inactive",clist)) {doit <- T; pch <- 2}
            if(is.element("Borderline active",clist)) {doit <- T; pch <- 4}
            if(is.element("Only one conc above baseline, active",clist)) {doit <- T; pch <- 4}
            if(is.element("Only highest conc above baseline, active",clist)) {doit <- T; pch <- 4}
            if(is.element("Gain AC50 < lowest conc & loss AC50 < mean conc",clist)) {doit <- T; pch <- 4}
            if(is.element("Includes potential flare region points",clist)) {doit <- T; pch <- 8}
            if(is.element("Includes potential chemical plate interlace points",clist)) {doit <- T; pch <- 8}

            lines(ai~xi,lwd=2,col=cols[i],lty=ltys[i])
            if(doit) {
                print(cautions.by.assay[,c("Name","Assay","flag")])
                flush.console()
                points(ai~xi,col="black",pch=pch,cex=1)
            }
        }
        else {
            lines(ai~xi,lwd=2,col=cols[i],lty=ltys[i])
        }
    }
    #------------------------------------------
    # now plot the receptor curves
    #------------------------------------------
    nreceptor <- NRECEPTOR
    resmat <- as.data.frame(matrix(nrow=NCONC,ncol=nreceptor))
    allrnames <- c()
    for(i in 1:NRECEPTOR) allrnames <- c(allrnames,paste("R",i,sep=""))
    names(resmat) <- allrnames
    resmat[] <- 0
    start <- vector(mode="numeric",length=nreceptor)
    lwr <- vector(mode="numeric",length=nreceptor)
    upr <- vector(mode="numeric",length=nreceptor)
    start[] <- 0
    lwr[] <- 0
    upr[] <- 1
    for(i in 1:nuse) {
        concname <- paste("C",i,sep="")
        A <- adata[,c(concname,t.names)]
        if(i>1) start <- res$par
        if(do.debug) res <- optim(par=start,f=AFR.va,A=A,method="L-BFGS-B",lower=lwr,upper=upr,control=list(maxit=2000))
        else res <- optim(par=start,f=AFR.va,A=A,method="L-BFGS-B",lower=lwr,upper=upr,control=list(maxit=2000))        #cat(concname,"  eps/alpha: ",format(res$value/ALPHA,digits=2),"\n")
        
        for(j in 1:nreceptor) resmat[i,j] <- res$par[j]*AUCSCALE1
        if(res$convergence!=0 || do.debug) cat(i,"Convergence: ",res$convergence," Calls: ",res$counts," residual: ",res$value," : ",res$message,"\n")
   		if(do.debug) print(res$par)	
   	}
                                        # plot the predicted R values
    ltys <- c( 1,     1,     1,       1,      1,     2,        1,     1,       1,       2,       1,       2)
    cols <- c("blue","red", "black", "green", "gray","hotpink","cyan","yellow4" ,"orange","orange","khaki3","khaki3")
    rnames <- c()
    for(i in 1:NRECEPTOR) rnames <- c(rnames,paste("R",i,sep=""))
    aucr1 <- receptor.score(resmat[,1])
    aucr2 <- receptor.score(resmat[,2])

    ac50 <- pseudo.AC(code,"AC50",aucr1,aucr2)$median.value

    lines(c(ac50,ac50),c(0,1),col="green",lwd=3)

    subtitle <- paste("Agonist:",format(aucr1,digits=2)," Antagonist:",format(aucr2,digits=2))
    plot(resmat[1:nuse,1]~xlist,xlim=c(xmin.plot,200),log="x",ylim=c(0,1),cex.lab=1.2,cex.axis=1.2,xlab="Concentration (uM)",ylab="Receptor Score",type="n",lwd=3,col="blue",main=paste(casrn,":",cname,"\n",subtitle))
    cytotox.median <- CYTOTOX[code,"cytotox_median_um"]
    cytotox.lower <- CYTOTOX[code,"cytotox_lower_bound_um"]
    if(cytotox.median<1000) {
    	rect(cytotox.lower,0,1000,1,col="gray80",border="black")
        lines(c(cytotox.median,cytotox.median),c(0,1),lwd=3,col="red")
    }
    lines(c(ac50,ac50),c(0,1),col="green",lwd=3)
    lines(c(1e-6,1e6),c(1,1),col="black",lwd=1)

    for(i in 1:NRECEPTOR0) lines(resmat[1:nuse,i]~xlist,lwd=3,col=cols[i],lty=ltys[i])
    text(x=xmin.plot,y=0.85,strength,cex=1,pos=4)

    if(do.debug) browser()

    return(resmat)
}
#--------------------------------------------------------------------------------------
#
# Error function for contraint problem
#
#--------------------------------------------------------------------------------------
AFR.va <- function(x,A) {
    Ameas <- A[,1]
    nreceptor <- NRECEPTOR
    nassay <- NASSAY
    F <- as.matrix(A[,2:(nreceptor+1)])
    R <- matrix(nrow=nreceptor,ncol=1)
    R[] <- x
    Apred <- F%*%R
    w <- vector(mode="numeric",length=NASSAY)
    w[] <- 1
    w[1] <- 1/3
    w[2] <- 1/3
    w[3] <- 1/3
    w[4] <- 1/6
    w[5] <- 1/6
    w[6] <- 1/6
    w[7] <- 1/6
    w[8] <- 1/6
    w[9] <- 1/6
    w[10] <- 1/2
    w[11] <- 1/2
    w[12] <- 1/2
    w[13] <- 1/2
    w[14] <- 1/2
    w[15] <- 1/2
    w[16] <- 1/1
    w[17] <- 1/2
    w[18] <- 1/2

    #w[] <- 1
    
    eret <- 0
    bot <- 0
    top <- 0
    mask <- Ameas
    mask[] <- 1
    mask[is.na(Ameas)] <- 0
    for(i in 1:nassay) {
        if(mask[i]==1) {
            top <- top + w[i] * (Apred[i]-Ameas[i])**2
            bot <- bot + w[i]
        }
    }
    eret <- top/bot/sum(mask) + penalty(R)
    return(eret)
}
#--------------------------------------------------------------------------------------
#
# penalty term
#
#--------------------------------------------------------------------------------------
penalty <- function(x) {
	if(PENALTY.METHOD=="RIDGE") value <- ALPHA * sum(x*x) # ridge regression
	if(PENALTY.METHOD=="LASSO") value <- ALPHA * sum(abs(x)) # LASSO regression
	if(PENALTY.METHOD=="THRESHOLD") {
		sumx <- sum(x)
		a <- sumx**10
		b <- 0.5**10
		value <- ALPHA * a/(a+b)
	}
	return(value)
}
#--------------------------------------------------------------------------------------
#
# do the hierarchical clustering on the R matrix
#
#===============================================================================
# Pathway-specific - update pathway-specific assay behavior
#===============================================================================
#--------------------------------------------------------------------------------------
RHM <- function(mode="ref",to.file=F) {
    if(to.file) {
        fname <- paste("../plots/",mode,"chem_heatmap_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    file <- paste("../output/",mode,"chem_resmat_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    cmethod <- HEATMAP.CMETHOD
    nlevel <- 25

    rdata <- read.table(file,sep="\t",header=F,stringsAsFactors=F,comment.char="\"",quote="")

    nchem <- dim(rdata)[1]
    nconc <- 13
    dmat <- matrix(nrow=nchem,ncol=NRECEPTOR*(nconc-1))
    counter <- 0

    dmat <- rdata[,3:dim(rdata)[2]]
    dmat <- dmat[,1:(NRECEPTOR0*NCONC)]

    if(mode=="ref") {
    	color.list <- vector(length=dim(REFCHEMS)[1],mode="character")
    	color.list <- REFCHEMS[,"Agonist.Potency"]
    	color.list[is.element(color.list,"Strong")] <- "red"
    	color.list[is.element(color.list,"Weak")] <- "yellow"
    	color.list[is.element(color.list,"Very Weak")] <- "green"
    	color.list[is.element(color.list,"Very weak")] <- "green"
    	color.list[is.element(color.list,"Moderate")] <- "orange"
    	color.list[is.element(color.list,"Moderate")] <- "orange"
    	color.list[is.element(color.list,"Inactive")] <- "white"
    	color.list[is.element(color.list,"Antagonist")] <- "black"
    	color.list[is.element(color.list,"Antagonist Inactive")] <- "white"
    	color.list[is.element(color.list,"Cytostatic")] <- "gray"
	}
	main <- "Normalized Response by Concentration"

    if(mode=="ref") {
	    heatmap(as.matrix(dmat),margins=c(5,10),scale="none",labRow=rdata[,2],Colv=NA,revC=T,
	            xlab="",ylab="",cexCol=0.1,cexRow=1,col=brewer.pal(9,"Reds"),
	            hclustfun=function(x) hclust(d=dist(x),method=cmethod),RowSideColors=color.list,main=main)
	}
	else {
	    heatmap(as.matrix(dmat),margins=c(5,10),scale="none",labRow=rdata[,2],Colv=NA,revC=T,
	            xlab="",ylab="",cexCol=0.1,cexRow=1,col=brewer.pal(9,"Reds"),
	            hclustfun=function(x) hclust(d=dist(x),method=cmethod),main=main)
	}
   	if(to.file) dev.off()
    cl <- hclust(d=dist(dmat),method=cmethod)
    hcut <- 1
    clcut <- cutree(cl,h=hcut)
    clout <- cbind(clcut,clcut)
    clout <- cbind(clout,clcut)
    clout<- as.data.frame(clout)
    for(i in 1:length(clcut)) {
        clout[i,1] <- rdata[i,1]
        clout[i,2] <- rdata[i,2]
    }
    names(clout) <- c("CODE","Name","Level_1")

    cat("Finished preping clusters for hcut: ",hcut,"\n")
    flush.console()

    for(hcut in 2:nlevel) {
        clcut <- cutree(cl,h=hcut)
        clout <- cbind(clout,clcut)
        names(clout)[dim(clout)[2]] <- paste("Level_",hcut,sep="")
        cat("Finished preping clusters for hcut: ",hcut,"\n")
        flush.console()
    }
   	outfile <- paste("../output/",mode,"chem_clusters_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    write.table(clout,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# calculate the area under the curve
#
#--------------------------------------------------------------------------------------
AUCcalc <- function(mode="ref") {
    file <- paste("../input/RotroffModel.txt",sep="")
    rotroff <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
	rownames(rotroff)<- rotroff[,"CODE"]

    file <- paste("../output/",mode,"chem_resmat_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    cmethod <- HEATMAP.CMETHOD
    nlevel <- 25
    rdata <- read.table(file,sep="\t",header=F,stringsAsFactors=F,comment.char="",quote="")
    print(dim(rdata))
    fname.data <- paste("../output/",mode,"chem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    cat("",file=fname.data,append=F)
    s <- "CODE\tName"
    for(i in 1:NRECEPTOR) s <- paste(s,"\tR",i,sep="")
	if(PATHWAY=="ER") {
	    s <- paste(s,"\tRotroff_COMPOSITE_SCORE_SCALED_AGONIST",sep="")
	    s <- paste(s,"\tRotroff_COMPOSITE_SCORE_SCALED_ANTAGONIST",sep="")
	    s <- paste(s,"\tRotroff_COMPOSITE_SCORE_SCALED_BINDING",sep="")
	    s <- paste(s,"\tRotroff_COMPOSITE_SCORE_SCALED_GROWTH",sep="")
	    s <- paste(s,"\tRotroff_INTERACTION_SCORE",sep="")
	}
    s <- paste(s,"\n",sep="")
    cat(s,file=fname.data,append=F)
    nmax <- dim(rdata)[1]
    for(i in 1:nmax) {
        code <- rdata[i,1]
        cname <- CHEMS[code,"Name"]
        counter <- 3
        s <- paste(code,"\t",cname,sep="")
        for(j in 1:NRECEPTOR) {
        	istart <- (j-1)*NCONC + 3
        	iend <- j*NCONC +2
        	x <- rdata[i,istart:iend]
            auc <- receptor.score(x,do.print=F)
            s <- paste(s,"\t",format(auc,digits=3),sep="")
        }

		if(PATHWAY=="ER") {
	        v1 <- NA
	        v2 <- NA
	        v3 <- NA
	        v4 <- NA
	        v5 <- NA
	
	        temp <- rotroff[code,]
	        if(dim(temp)[1]==1) {
	        	v1 <- rotroff[code,"COMPOSITE_SCORE_SCALED_AGONIST"]
	        	v2 <- rotroff[code,"COMPOSITE_SCORE_SCALED_ANTAGONIST"]
	        	v3 <- rotroff[code,"COMPOSITE_SCORE_SCALED_BINDING"]
	        	v4 <- rotroff[code,"COMPOSITE_SCORE_SCALED_GROWTH"]
	        	v5 <- rotroff[code,"INTERACTION_SCORE"]
	        }
	        s <- paste(s,"\t",format(v1,digits=3),"\t",format(v2,digits=3),"\t",format(v3,digits=3),"\t",format(v4,digits=3),"\t",format(v5,digits=3),"\n",sep="")
	    }
		else s <- paste(s,"\n",sep="")
        cat(s)
        flush.console()
        cat(s,file=fname.data,append=T)
    }
}
#--------------------------------------------------------------------------------------
#
# do the hierarchical clustering on the AUC matrix
#
#===============================================================================
# Pathway-specific - update pathway-specific assay behavior
#===============================================================================
#--------------------------------------------------------------------------------------
AUCHM <- function(mode="ref",to.file=F,dcut=5) {
    if(to.file) {
        fname <- paste("../plots/",mode,"chem_AUC_heatmap_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    file <- paste("../output/",mode,"chem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    cmethod <- HEATMAP.CMETHOD
    nlevel <- 25

    rdata <- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    nchem <- dim(rdata)[1]
    dmat <- matrix(nrow=nchem,ncol=NRECEPTOR)
    counter <- 0
    dmat <- rdata[,3:dim(rdata)[2]]
    if(mode=="ref") dmat <- dmat[,1:NRECEPTOR0]
    if(mode=="all") dmat <- dmat[,1:NRECEPTOR]
    dmat.d <- dmat
    #dmat.d[dmat.d<1e-4] <- 1e-4
    dmat.d[dmat<SPECIFIC.AUC.CUTOFF] <- 0
    dmat.d[dmat.d>0] <- 1
    rs <- rowSums(dmat.d)
    if(mode=="all") dmat <- dmat[rs>0,]
    cat("getting ready to run heatmap\n")
    flush.console()

    lab.col <- names(dmat)
    lab.col[1] <- "Agonist"
    lab.col[2] <- "Antagonist"
    dmat.plot <- dmat*100
    dmat.plot[dmat.plot<=dcut] <- 1
    dmat.plot <- log10(dmat.plot)
    main <- "log(AUC)"
    if(mode=="ref") {
    	color.list <- vector(length=dim(REFCHEMS)[1],mode="character")
        color.list[] <- "white"

        color.list[is.element(REFCHEMS[,"Agonist.Potency"],"Inactive")] <- "gray"
        color.list[is.element(REFCHEMS[,"Agonist.Potency"],"Very Weak")] <- "green"
        color.list[is.element(REFCHEMS[,"Agonist.Potency"],"Weak")] <- "yellow"
        color.list[is.element(REFCHEMS[,"Agonist.Potency"],"Moderate")] <- "orange"
        color.list[is.element(REFCHEMS[,"Agonist.Potency"],"Strong")] <- "red"
        color.list[is.element(REFCHEMS[,"Antagonist.Potency"],"Active")] <- "black"

        heatmap(as.matrix(dmat.plot),margins=c(5,12),scale="none",labRow=rdata[,2],labCol=lab.col,Colv=NA,revC=T,
                xlab="",ylab="",RowSideColors=color.list,cexCol=1.2,cexRow=0.8,col=brewer.pal(9,"Reds"),main=main,
                hclustfun=function(x) hclust(d=dist(x),method=cmethod),verbose=T)
    }
    else {
    	for(i in 1:NRECEPTOR) {
    		receptor <- paste("R",i,sep="")
    		lab.col[i] <- ar.index(receptor)$nickname
		}
        nc <- dim(dmat.plot)[1]
        main <- paste("log(AUC) chems:",nc)

        heatmap(as.matrix(dmat.plot),margins=c(5,5),scale="none",labRow="",labCol=lab.col,Colv=NA,revC=T,
                xlab="",ylab="",cexCol=1,cexRow=0.1,col=brewer.pal(9,"Reds"),main=main,
                hclustfun=function(x) hclust(d=dist(x),method=cmethod))
    }
    if(to.file) dev.off()
    cat("heatmap done\n")
    cl <- hclust(d=dist(dmat),method=cmethod)
    hcut <- 1
    clcut <- cutree(cl,h=hcut)
    clout <- cbind(clcut,clcut)
    clout <- cbind(clout,clcut)
    clout<- as.data.frame(clout)
    for(i in 1:length(clcut)) {
        clout[i,1] <- rdata[i,1]
        clout[i,2] <- rdata[i,2]
    }
    names(clout) <- c("CODE","Name","Level_1")

    cat("Finished preping clusters for hcut: ",hcut,"\n")
    flush.console()

    for(hcut in 2:nlevel) {
        clcut <- cutree(cl,h=hcut)
        clout <- cbind(clout,clcut)
        names(clout)[dim(clout)[2]] <- paste("Level_",hcut,sep="")
        cat("Finished preping clusters for hcut: ",hcut,"\n")
        flush.console()
    }
    outfile <- paste("../output/",mode,"chem_AUC_clusters_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    write.table(clout,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")

    if(!to.file) browser()
}
#--------------------------------------------------------------------------------------
#
# receptor score or AUC
#
#--------------------------------------------------------------------------------------
interpolate.conc <- function(x,y,x0) {
	nx <- length(x)
	if(x0>=x[nx]) {
		y0 <- y[nx]
		iuse <- nx
	}
	else {
		iuse <- which.max(x0<x)
		if(iuse==0) y0 <- 0
		else if(iuse>=nx) y0 <- y[nx]
		else {
			y0 <- y[iuse]+(y[iuse+1]-y[iuse])*(log10(x0)-log10(x[iuse])) / (log10(x[iuse+1])-log10(x[iuse]))
		}
	}
	return(list(iuse=iuse,y0=y0))
}
#--------------------------------------------------------------------------------------
#
# receptor score or AUC
#
#--------------------------------------------------------------------------------------
receptor.score <- function(x,do.print=F) {
	nuse <- length(x)
	if(nuse<2) return(0)
	if(do.print) {
		cat("==========================================\n")
		print(x)
	}
	score <- x[1]
	for(i in 2:nuse) {
		slope.sign <- 1
		delta <- x[i]-x[i-1]
		if(delta < -0.01) slope.sign <- -1
		score <- score + slope.sign*x[i]
		if(do.print) cat("   ",i,"delta: ",format(delta,digits=3),"sign: ",slope.sign," x:",format(x[i],digits=3),"\n")
	}
	score <- score/nuse
	if(score<0) score <- 0
	if(do.print) cat("Score: ",format(score,digits=3),"\n")
	score <- score*AUCSCALE2
	return(score)
}
#--------------------------------------------------------------------------------------
#
# generic transfer function
#
#--------------------------------------------------------------------------------------
transfer <- function(x,w=1,k=0.5) {
	a <- x**(10*w)
	b <- k**(10*w)
	tf <- a/(a+b)
	return(tf)
}
#--------------------------------------------------------------------------------------
#
# plot the range of AUC in the agonist channel (R1) as a function of expected potency
#
#--------------------------------------------------------------------------------------
refchem.dist <- function(to.file=F) {
    file <- paste("../output/refchem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    rdata <- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    bnames <- c("Inactive","Very Weak","Weak","Moderate","Strong")
    nbins <- length(bnames)
    y <- rdata[,"R1"]
    groups <- y
    groups[] <- 0
    refout <- REFCHEMS
    refout <- cbind(refout,refout[,1])
    names(refout)[dim(refout)[2]] <- "Agonist.AUC"
    refout <- cbind(refout,refout[,1])
    names(refout)[dim(refout)[2]] <- "Antagonist.AUC"
    refout[,"Agonist.AUC"] <- 0
    refout[,"Antagonist.AUC"] <- 0

    for(i in 1:dim(rdata)[1]) {
        chem <- rdata[i,1]
        expect <- as.character(REFCHEMS[is.element(REFCHEMS[,1],chem),"Agonist.Potency"])
        if(!is.na(expect)) {
            if(expect=="Inactive") groups[i] <- 1
            if(expect=="Very Weak") groups[i] <- 2
            if(expect=="Weak") groups[i] <- 3
            if(expect=="Moderate") groups[i] <- 4
            if(expect=="Strong") groups[i] <- 5
        }
        R1 <- rdata[i,"R1"]
        R2 <- rdata[i,"R2"]
        refout[which.max(refout[,1]==chem),"Agonist.AUC"] <- R1
        refout[which.max(refout[,1]==chem),"Antagonist.AUC"] <- R2
    }
    outfile <- paste("../output/refchem_scored_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    write.table(refout,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")

    y <- y[groups>0]
    groups <- groups[groups>0]

    if(to.file) {
        fname <- paste("../plots/refchem_activity_ranges_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(2,1),mar=c(4,4,2,0.1))
    main <- "Agonist Score (R1) vs. Reference Activity Class"
    boxplot(y~groups,ylab="Agonist Score",xlab="Activity Class",names=bnames,cex.axis=1.2,cex.lab=1.2,main=main)
    if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# lane plot of AUC for reference chemicals
#
#===============================================================================
# Pathway-specific - update pathway-specific assay behavior
#===============================================================================
#--------------------------------------------------------------------------------------
refchem.laneplot <- function(to.file=F) {
    if(to.file) {
        fname <- paste("../plots/refchem_laneplot_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=8,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    file <- paste("../output/refchem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    rdata <- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    nchem <- dim(rdata)[1]
    dmat <- matrix(nrow=nchem,ncol=NRECEPTOR0)
    counter <- 0
    dmat <- rdata[,3:dim(rdata)[2]]
    dmat <- dmat[,1:NRECEPTOR0]
    
    dmat[dmat<0.01] <- 0.01
    
   	pot.agon <- REFCHEMS[,"Agonist.Potency"]
   	pot.agon <- pot.agon[!is.na(pot.agon)]
   	nagon <- length(pot.agon)
    
    plot(1~1,xlim=c(0,2.2),ylim=c(0,40),xlab="AUC",ylab="",type="n",main="Agonist",yaxt="n",cex.axis=0.8)
 	x <- 1
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	
 	order.list <- sort(dmat[,"R1"],index.return=T)$ix
    counter <- 0
    for(i in 1:nchem) {
    	cname <- REFCHEMS[order.list[i],"Chemical.Name"]
    	pot.agon <- REFCHEMS[order.list[i],"Agonist.Potency"]
    	v1 <- 1e-6
    	v2 <- 1e-6
    	if(!is.na(pot.agon)) {
    		counter <- counter+1
    		v1 <- dmat[order.list[i],"R1"]
    		v2 <- max(dmat[order.list[i],])
    		col="green"
    		if(pot.agon=="Inactive") col <- "red"
    		points(v1,counter,pch=21,bg=col,fg="black",cex=1.5)
    		if(v2>v1*2 && v2>0.1) {
	    		points(v2,counter,pch=4)
    			ival <-  which.max( is.element(dmat[order.list[i],],max(dmat[order.list[i],])) )
				rval <- names(dmat)[ival]
    			text(v2,counter,rval,pos=4,cex=0.8)
    		}
    		text(1.1,counter,pot.agon,pos=4,cex=0.8)
    		text(1.5,counter,cname,pos=4,cex=0.8)
    	}
    
    }
    if(!to.file) browser()

   	pot.antagon <- REFCHEMS[,"Antagonist.Potency"]
   	pot.antagon <- pot.agon[!is.na(pot.antagon)]
   	nantagon <- length(pot.antagon)
    
    delta <- nantagon/nagon
    plot(1~1,,xlim=c(0,2.2),ylim=c(0,40),xlab="AUC",ylab="",type="n",main="Antagonist",yaxt="n",cex.axis=0.8)
 	x <- 1
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x-0.2
  	order.list <- sort(dmat[,"R1"],index.return=T)$ix
  	dmat <- dmat[order.list,]
  	refchems <- REFCHEMS[order.list,]
  	order.list <- sort(dmat[,"R2"],index.return=T)$ix
	
	lines(c(-1e6,1e6),c(nantagon+1,nantagon+1))
    counter <- 0
    for(i in 1:nchem) {
    	cname <- refchems[order.list[i],"Chemical.Name"]
    	pot.agon <- refchems[order.list[i],"Antagonist.Potency"]
    	v1 <- 1e-6
    	v2 <- 1e-6
    	if(!is.na(pot.agon)) {
    		counter <- counter+1
    		v1 <- dmat[order.list[i],"R2"]
    		v2 <- max(dmat[order.list[i],])
    		col="green"
    		if(pot.agon=="Inactive") col <- "red"
    		points(v1,counter,pch=21,bg=col,fg="black",cex=1.5)
    		if(v2>v1*2 && v2>0.1) {
	    		points(v2,counter,pch=4)
    			ival <-  which.max( is.element(dmat[order.list[i],],max(dmat[order.list[i],])) )
				rval <- names(dmat)[ival]
    			text(v2,counter,rval,pos=4,cex=0.8)
    		}
    		text(1.1,counter,pot.agon,pos=4,cex=0.8)
    		text(1.5,counter,cname,pos=4,cex=0.8)
    	}
    
    }

    if(!to.file) browser()
    else dev.off()
}
#--------------------------------------------------------------------------------------
#
# lane plot of AUC for reference chemicals
#
#===============================================================================
# Pathway-specific - update pathway-specific assay behavior
#===============================================================================
#--------------------------------------------------------------------------------------
refchem.laneplot.log <- function(to.file=F) {
    if(to.file) {
        fname <- paste("../plots/refchem_log_laneplot_",PENALTY.METHOD,"_",ALPHA,".pdf",sep="")
        pdf(file=fname,width=7,height=8,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    file <- paste("../output/refchem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    rdata <- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    nchem <- dim(rdata)[1]
    dmat <- matrix(nrow=nchem,ncol=NRECEPTOR0)
    counter <- 0
    dmat <- rdata[,3:dim(rdata)[2]]
    dmat <- dmat[,1:NRECEPTOR0]
    
    dmat[dmat<0.001] <- 0.001
    
   	pot.agon <- REFCHEMS[,"Agonist.Potency"]
   	pot.agon <- pot.agon[!is.na(pot.agon)]
   	nagon <- length(pot.agon)
    
    plot(1~1,xlim=c(1e-3,1000),ylim=c(0,40),xlab="AUC",ylab="",type="n",main="Agonist",yaxt="n",cex.axis=0.8,log="x",xaxp=c(0.0001,1,1))
 	x <- 1
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	
 	order.list <- sort(dmat[,"R1"],index.return=T)$ix
    counter <- 0
    for(i in 1:nchem) {
    	cname <- REFCHEMS[order.list[i],"Chemical.Name"]
    	pot.agon <- REFCHEMS[order.list[i],"Agonist.Potency"]
    	v1 <- 1e-6
    	v2 <- 1e-6
    	if(!is.na(pot.agon)) {
    		counter <- counter+1
    		v1 <- dmat[order.list[i],"R1"]
    		v2 <- max(dmat[order.list[i],])
    		col="green"
    		if(pot.agon=="Inactive") col <- "red"
    		points(v1,counter,pch=21,bg=col,fg="black",cex=1.5)
    		if(v2>v1*2 && v2>0.1) {
	    		points(v2,counter,pch=4)
    			ival <-  which.max( is.element(dmat[order.list[i],],max(dmat[order.list[i],])) )
				rval <- names(dmat)[ival]
    			text(v2,counter,rval,pos=4,cex=0.8)
    		}
    		text(1.1,counter,pot.agon,pos=4,cex=0.8)
    		text(10,counter,cname,pos=4,cex=0.8)
    	}
    
    }
    if(!to.file) browser()

   	pot.antagon <- REFCHEMS[,"Antagonist.Potency"]
   	pot.antagon <- pot.agon[!is.na(pot.antagon)]
   	nantagon <- length(pot.antagon)
    
    delta <- nantagon/nagon
    plot(1~1,xlim=c(1e-3,1000),ylim=c(0,40),xlab="AUC",ylab="",type="n",main="Antagonist",yaxt="n",cex.axis=0.8,log="x",xaxp=c(0.0001,1,1))
 	x <- 1
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
 	lines(c(x,x),c(0,nagon),col="gray"); x <- x/10
  	order.list <- sort(dmat[,"R1"],index.return=T)$ix
  	dmat <- dmat[order.list,]
  	refchems <- REFCHEMS[order.list,]
  	order.list <- sort(dmat[,"R2"],index.return=T)$ix
	
	lines(c(-1e6,1e6),c(nantagon+1,nantagon+1))
    counter <- 0
    for(i in 1:nchem) {
    	cname <- refchems[order.list[i],"Chemical.Name"]
    	pot.agon <- refchems[order.list[i],"Antagonist.Potency"]
    	v1 <- 1e-6
    	v2 <- 1e-6
    	if(!is.na(pot.agon)) {
    		counter <- counter+1
    		v1 <- dmat[order.list[i],"R2"]
    		v2 <- max(dmat[order.list[i],])
    		col="green"
    		if(pot.agon=="Inactive") col <- "red"
    		points(v1,counter,pch=21,bg=col,fg="black",cex=1.5)
    		if(v2>v1*2 && v2>0.1) {
	    		points(v2,counter,pch=4)
    			ival <-  which.max( is.element(dmat[order.list[i],],max(dmat[order.list[i],])) )
				rval <- names(dmat)[ival]
    			text(v2,counter,rval,pos=2,cex=0.8)
    		}
    		text(1.1,counter,pot.agon,pos=4,cex=0.8)
    		text(10,counter,cname,pos=4,cex=0.8)
    	}
    
    }

    if(!to.file) browser()
    else dev.off()
}
#--------------------------------------------------------------------------------------
#
# Calculate the EAS quantity for each chemical
#
#--------------------------------------------------------------------------------------
EAScalc <- function(mode="FP",receptor="R1",cutoff1=SPECIFIC.AUC.CUTOFF,cutoff2=0,eas.cutoff1=5,eas.cutoff2=15) {
    cat("EAScalc: ",receptor,"\n")
    flush.console()
    if(mode=="FP") {
    	if(!exists("DMAT.FP")) {
		    file <- "../input/structure_input/ToxCast_Tanimoto_matrix_REDUCED_2013_03_05.txt"
		    temp <- read.table(file,sep="\t",header=T,check.names=F,quote="",comment.char="",stringsAsFactors=F)
		    DMAT.FP <<- temp
		    cat("DMAT.FP",dim(DMAT.FP),"\n")    	
    	}
    	dmat <- DMAT.FP
    }
    if(mode=="CT") dmat <- DMAT.CT

	if(!exists("AUC")) {
	    file <- paste("../output/allchem_AUC_",PENALTY.METHOD,"_",ALPHA0,".txt",sep="")
		temp <- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
		AUC <<- temp
	}
	
    mask <- as.data.frame(AUC[,receptor])
    maskA <- AUC[AUC[,receptor]>=cutoff1,"CODE"]
    maskB <- AUC[AUC[,receptor]<=cutoff2,"CODE"]
    nchem <- dim(AUC)[1]
    score <- as.data.frame(matrix(nrow=nchem,ncol=8+NRECEPTOR))
    rnames <- c()
    for(i in 1:NRECEPTOR) rnames <- c(rnames,paste("R",i,sep=""))
    names(score) <- c("CODE","Name",rnames,"p.ttest","p.wilcox","EAS.score","EAS.class","KNN.score","KNN.class")
    for(i in 1:nchem) {
        code <- AUC[i,"CODE"]
        cname <- AUC[i,"Name"]
        val <- AUC[i,receptor]
        if(val<1e-4) val <- 0
        score[i,"CODE"] <- code
        score[i,"Name"] <- cname
        for(j in 1:NRECEPTOR) {
        	column <- paste("R",j,sep="")
        	score[i,column] <- AUC[i,column]
        }
        score[i,"p.ttest"] <- -1
        score[i,"p.wilcox"] <- -1
        score[i,"EAS.score"] <- 0
        score[i,"EAS.class"] <- ""
        score[i,"KNN.class"] <- ""
        tmaskA <- maskA[!is.element(maskA,code)]
        tmaskB <- maskB[!is.element(maskB,code)]
        tmaskA <- tmaskA[is.element(tmaskA,row.names(dmat))]
        tmaskB <- tmaskB[is.element(tmaskB,row.names(dmat))]

        tdA <- dmat[tmaskA,code]
        tdB <- dmat[tmaskB,code]
        if(sum(tdA)*sum(tdB)>0) {
            pttest <- t.test(tdA,tdB,alternative="greater")$p.value
            pwilco <- wilcox.test(tdA,tdB,alternative="greater")$p.value
            eas.score <- -log10(pwilco)
            score[i,"p.ttest"] <- pttest
            score[i,"p.wilcox"] <- pwilco
            score[i,"EAS.score"] <- eas.score
	        if(eas.score<=eas.cutoff1 && val>=cutoff1) score[i,"EAS.class"] <- "EAS low AUC hi"
	      	else if(eas.score>=eas.cutoff2 && val<=cutoff2) score[i,"EAS.class"] <- "EAS hi AUC low"
	        else if(eas.score>=eas.cutoff2 && val>=cutoff2 && val<cutoff1) score[i,"EAS.class"] <- "EAS hi AUC med"
        }
        score[i,"KNN.score"] <- NA
        if(is.element(code,names(dmat))) {
            dnames <- rownames(dmat)
            di <- dmat[,code]
            index <- sort(di,decreasing=T,index.return=T)$ix
            vals <- sort(di,decreasing=T,index.return=T)$x
            index <- index[1:6]
            vals <- vals[1:6]
            codesj <- dnames[index]
            vals <- vals[codesj!=code]
            codesj <- codesj[codesj!=code]
            codesj <- codesj[vals>0.7]
            if(length(codesj)>0) {
                rsj <- AUC[is.element(AUC[,"CODE"],codesj),receptor]
                if(length(rsj)>0) {
                	knn <- max(rsj,na.rm=T)
                	if(knn<1e-3) knn <- 0
                	score[i,"KNN.score"] <- knn
					if(knn<=cutoff2 && val>=cutoff1) score[i,"KNN.class"] <- "KNN low AUC hi"
					else if(knn>=cutoff1 && val<=cutoff2) score[i,"KNN.class"] <- "KNN hi AUC low"
					else if(knn>=cutoff1 && val>=cutoff2 && val<cutoff1) score[i,"KNN.class"] <- "KNN hi AUC med"
				}
            }
    	}
    }
	if(receptor=="R1") EAS.R1 <<- score
	if(receptor=="R2") EAS.R2 <<- score
	
    file <- paste("../output/EAS_",mode,"_",receptor,"_",cutoff1,"_",cutoff2,"_",PENALTY.METHOD,"_",ALPHA0,".txt",sep="")
    write.table(score,file=file, row.names=F, append=FALSE, quote=F, sep="\t")
}
#--------------------------------------------------------------------------------------
#
# prepare the supermatrix
#
#--------------------------------------------------------------------------------------
prep.supermatrix <- function() {
	cat("==========================================\n")
	cat("prep.supermatrix\n")
	cat("==========================================\n")
	flush.console()
    ALPHA <- 1
    file <- paste("../output/allchem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    rdata <<- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    ALPHA <- 0.01
    file <- paste("../output/allchem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    rdata.min <<- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")
    ALPHA <- 100
	file <- paste("../output/allchem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    rdata.max <<- read.table(file,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="\"")

	if(!exists("EAS.R1")) {
    	file <- "../output/EAS_FP_R1_0.07_0_THRESHOLD_1.txt"
    	temp <- read.table(file,sep="\t",header=T,check.names=F,quote="",comment.char="",stringsAsFactors=F)
    	rownames(temp) <- temp[,"CODE"]
    	EAS.R1 <<- temp
	}
	if(!exists("EAS.R2")) {
    	file <- "../output/EAS_FP_R2_0.07_0_THRESHOLD_1.txt"
    	temp <- read.table(file,sep="\t",header=T,check.names=F,quote="",comment.char="",stringsAsFactors=F)
    	rownames(temp) <- temp[,"CODE"]
    	EAS.R2 <<- temp
	}

    result <- CHEMS
    rownames(result) <- result[,"CODE"]
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "EDSP.Universe"
    result[,"EDSP.Universe"] <- as.integer(is.element(CODE.LIST,EDSP.UNIVERSE[,"CODE"]))

    result <- cbind(result,CYTOTOX[,"cytotox_median_um"])
    names(result)[dim(result)[2]] <- "cytotox_median_um"
    result <- cbind(result,CYTOTOX[,"cytotox_lower_bound_um"])
    names(result)[dim(result)[2]] <- "cytotox_lower_bound_um"
    result <- cbind(result,CYTOTOX[,"nhit"])
    names(result)[dim(result)[2]] <- "cytotox.assays.hit"

	receptor <- "R1"
	colname <- "AUC.Agonist"
    result <- cbind(result,rdata[,receptor])
    names(result)[dim(result)[2]] <- colname
	colname <- "AUC.Agonist.alphamin"
    result <- cbind(result,rdata.min[,receptor])
    names(result)[dim(result)[2]] <- colname
	colname <- "AUC.Agonist.alphamax"
    result <- cbind(result,rdata.max[,receptor])
    names(result)[dim(result)[2]] <- colname

	receptor <- "R2"
	colname <- "AUC.Antagonist"
    result <- cbind(result,rdata[,receptor])
    names(result)[dim(result)[2]] <- colname
	colname <- "AUC.Antagonist.alphamin"
    result <- cbind(result,rdata.min[,receptor])
    names(result)[dim(result)[2]] <- colname
	colname <- "AUC.Antagonist.alphamax"
    result <- cbind(result,rdata.max[,receptor])
    names(result)[dim(result)[2]] <- colname
	
    for(i in 3:NRECEPTOR) {
        receptor <- paste("R",i,sep="")
        result <- cbind(result,rdata[,receptor])
        arindex <- ar.index(receptor)
		nickname <- arindex$nickname
		nickname <- paste("AUC.",nickname,sep="")
        names(result)[dim(result)[2]] <- nickname
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.AC50[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_AC50",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.EMAX[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_Emax",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.T[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_T",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.W[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_W",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,ZSCORE[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_Zscore",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.MAXCONC[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_maxConc",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.AC10[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_AC10",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.ACC[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_ACC",sep="")
    }
    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.ACB[,assay])
        names(result)[dim(result)[2]] <- paste(assay,"_ACB",sep="")
    }

    for(i in 1:NASSAY) {
        assay <- ASSAY.LIST[i]
        result <- cbind(result,MAT.ACB[,assay])
        flag.name <- paste(assay,"_flags",sep="")
        names(result)[dim(result)[2]] <- flag.name
        result[,flag.name] <- 0
        temp <- CAUTION.FLAGS[is.element(CAUTION.FLAGS[,"Assay"],assay),]
        for(j in 1:NCHEM) {
        	code <- CODE.LIST[j]
        	temp2 <- temp[is.element(temp[,"CODE"],code),]
        	result[code,flag.name] <- dim(temp2)[1]
        }
    }

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.AC50.median"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.AC50.min"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "AC50.spread"

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.AC10.median"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.AC10.min"

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.ACC.median"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.ACC.min"

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.ACB.median"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "pseudo.ACB.min"

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "maximum.receptor"

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "EAS.R1"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "EAS.R1.Class"

    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "EAS.R2"
    result <- cbind(result,result[,dim(result)[2]])
    names(result)[dim(result)[2]] <- "EAS.R2.Class"

	ntop <- 3 + NRECEPTOR - 1
	
	result[,"maximum.receptor"] <- "None"
	temp <- rdata[,3:ntop]
	rs <- rowSums(temp)
	rs[rs<SPECIFIC.AUC.CUTOFF] <- 0
	rs[rs>0] <- 1
	ntop <- 3 + NRECEPTOR - 1
	for(i in 1:NCHEM) {
		code <- CODE.LIST[i]
		if(rs[i]==1) {		# sum of all receptor values has to be at least SPECIFIC.AUC.CUTOFF
			temp <- rdata[i,3:ntop]
			imax <- as.integer(which.max(temp))
			vimax <- temp[imax]
			if(vimax>=SPECIFIC.AUC.CUTOFF) { #  one receptor must reach this value
			
				rtemp <- paste("R",imax,sep="")
				arindex <- ar.index(rtemp)
				receptor <- arindex$nickname

				temp[imax] <- 0
				for(j in 1:length(temp)) {
					if(temp[j]>=SPECIFIC.AUC.CUTOFF) {
						rj <- paste("R",j,sep="")
						arindex <- ar.index(rj)
						nickname <- arindex$nickname
						if(nickname=="Agonist" || receptor=="Agonist") receptor <- "Agonist"
						else if(nickname=="Antagonist" || receptor=="Antagonist") receptor <- "Antagonist"
						#else receptor <- paste(receptor,nickname,sep=".")
					}
				}
				result[i,"maximum.receptor"] <- receptor
			}
		}
	}

    for(i in 1:NCHEM) {
        code <- CODE.LIST[i]
        auc.R1 <- rdata[i,"R1"]
        auc.R2 <- rdata[i,"R2"]
        temp <- pseudo.AC(code,"AC50",auc.R1,auc.R2)
        ac50 <- temp$median.value
        ac50.min <- temp$min.value
        ac50.spread <- temp$spread

        temp <- pseudo.AC(code,"AC10",auc.R1,auc.R2)
        ac10 <- temp$median.value
        ac10.min <- temp$min.value

        temp <- pseudo.AC(code,"ACC",auc.R1,auc.R2)
        acc <- temp$median.value
        acc.min <- temp$min.value

        temp <- pseudo.AC(code,"ACB",auc.R1,auc.R2)
        acb <- temp$median.value
        acb.min <- temp$min.value
                
        if(!is.na(ac50)) {
        	if(is.na(ac50.min)) ac50.min <- ac50/3
            if(ac50<1000000) {
                if(is.na(ac10)) {
                	ac10 <- ac50 / 3
                	ac10.min <- ac50.min / 3
                }
                if(is.na(acc)) {
                	acc <- ac50 / 3
                	acc.min <- ac50.min / 3
				}
                if(is.na(acb)) {
                	acb <- ac50 / 3
                	acb.min <- ac50.min / 3
                }
                if(ac10==1000000) {
                	ac10 <- ac50 / 3
                   	ac10.min <- ac50.min / 3
				}
                if(acb==1000000) {
                	acc <- ac50 / 3
                	acc.min <- ac50.min / 3
                }
                if(acb==1000000) {
                	acb <- ac50 / 3
                	acb.min <- ac50.min / 3
                }
            }
        }
        result[i,"pseudo.AC50.median"] <- ac50
        result[i,"pseudo.AC10.median"] <- ac10
        result[i,"pseudo.ACC.median"] <- acc
        result[i,"pseudo.ACB.median"] <- acb
        result[i,"pseudo.AC50.min"] <- ac50.min
        result[i,"pseudo.AC10.min"] <- ac10.min
        result[i,"pseudo.ACC.min"] <- acc.min
        result[i,"pseudo.ACB.min"] <- acb.min
        result[i,"AC50.spread"] <- ac50.spread
    }
 
 	rownames(EAS.R1) <- EAS.R1[,"CODE"]
 	rownames(EAS.R2) <- EAS.R2[,"CODE"]
 	
    for(i in 1:NCHEM) {
        code <- CODE.LIST[i]
        result[i,"EAS.R1"] <-EAS.R1[code,"EAS.score"]
        result[i,"EAS.R1.Class"] <-EAS.R1[code,"EAS.class"]
        result[i,"EAS.R2"] <-EAS.R2[code,"EAS.score"]
        result[i,"EAS.R2.Class"] <-EAS.R2[code,"EAS.class"]
	}

	result <- cbind(result,NTESTED)
	names(result)[dim(result)[2]] <- "N.assays.tested"
	result <- cbind(result,HITS)
	names(result)[dim(result)[2]] <- "N.assays.hit"
	result <- cbind(result,HITS.Z.HI)
	names(result)[dim(result)[2]] <- "N.assays.hit.hi.Z"
	result <- cbind(result,HITS.Z.LO)
	names(result)[dim(result)[2]] <- "N.assays.hit.lo.Z"
	
	x <- HITS.Z.HI / NTESTED
	y <- HITS.Z.LO / NTESTED
	result <- cbind(result,x)
	names(result)[dim(result)[2]] <- "promiscuity.hi.Z"
	result <- cbind(result,y)
	names(result)[dim(result)[2]] <- "promiscuity.lo.Z"

	
    SUPERMATRIX <<- result
    add.specificity.score()
    SUPERMATRIX <<- cbind(SUPERMATRIX,LITERATURE[,5:10])
    outfile <- "../output/superMatrix.csv"
    write.csv(SUPERMATRIX,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# add the specificity score to the supermatrix
#
#--------------------------------------------------------------------------------------
add.specificity.score <- function() {
	cat("==========================================\n")
	cat("add.specificity.score\n")
	cat("==========================================\n")
	temp <- SUPERMATRIX
	temp <- cbind(temp,temp[,dim(temp)[2]])
	names(temp)[dim(temp)[2]] <- "median.T"
	temp <- cbind(temp,temp[,dim(temp)[2]])
	names(temp)[dim(temp)[2]] <- "median.Z"

	temp[,"median.T"] <- 0
	temp[,"median.Z"] <- 0

	nchem <- dim(temp)[1]
	for(i in 1:nchem) {
		code <- temp[i,"CODE"]
		res <- specificity.scores(code)
		temp[i,"median.T"] <- res$spec.t
		temp[i,"median.Z"] <- res$spec.zscore
	}
	SUPERMATRIX <<- temp
}
#--------------------------------------------------------------------------------------
#
# calculate the specificty.score
#
#--------------------------------------------------------------------------------------
specificity.scores <- function(code) {
	
	specr <- SUPERMATRIX[code,"maximum.receptor"]
	if(specr=="None") return(list(spec.t=0,spec.zscore=0,spec.eas=0,spec.summary=0))
	ac50 <- SUPERMATRIX[code,"pseudo.AC50.median"]
	zmed <- CYTOTOX[code,"cytotox_median_log"]
	zmad <- CYTOTOX[code,"global_mad"]
	logAC50 <- log10(ac50)
	Z <- -(logAC50-zmed) /zmad
	
	t.list <- NULL
	z.list <- NULL
	for(i in 1:NASSAY) {
		ac50name <- paste(ASSAY.LIST[i],"_AC50",sep="")
		ac50 <- as.numeric(SUPERMATRIX[code,ac50name])
		if(ac50<1000000) {
			tname <- paste(ASSAY.LIST[i],"_T",sep="")
			tvalue <- as.numeric(SUPERMATRIX[code,tname])
			if(tvalue>0) t.list <- c(t.list,tvalue)
			zname <- paste(ASSAY.LIST[i],"_Zscore",sep="")
			zvalue <- as.numeric(SUPERMATRIX[code,zname])
			if(tvalue>0) z.list <- c(z.list,zvalue)
		}
	}
	t.value <- 0
	z.value <- 0
	if(length(t.list)>0) {
		t.value <- median(t.list)
		z.value <- median(z.list)
	}
	eas <- SUPERMATRIX[code,"EAS.R1"]
	
	t.cut <- 50
	z.cut <- 3
	eas.cut <- 10
	t.w <- 10.0
	z.w <- 0.2
	eas.w <- 2

	t.val <- t.value
	z.val <- z.value
	eas.val <- sigmoid(eas,eas.cut,eas.w)
	spec.score <- t.val * z.val * eas.val
	return(list(spec.t=t.val,spec.zscore=z.val,spec.eas=eas.val,spec.summary=spec.score))
}
#--------------------------------------------------------------------------------------
#
# Calculate a composite approximate AC50 for a single chemical
#
#===============================================================================
# Pathway-specific - start
#===============================================================================
#------------------------------------------------------------------------------------
pseudo.AC <- function(code,variable="AC50",AUC.R1,AUC.R2) {
    cutoff <- -1000000

    agonist.assays <- c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa","OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440","OT_ERa_EREGFP_0120","OT_ERa_EREGFP_0480","ATG_ERa_TRANS_up","ATG_ERE_CIS_up","Tox21_ERa_BLA_Agonist_ratio","Tox21_ERa_LUC_BG1_Agonist","ACEA_T47D_80hr_Positive")
    antagonist.assays <- c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa","OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440","OT_ERa_EREGFP_0120","OT_ERa_EREGFP_0480","Tox21_ERa_BLA_Antagonist_ratio","Tox21_ERa_LUC_BG1_Antagonist")

    median.agonist <- 1000000
    median.antagonist <- 1000000
    min.agonist <- 1000000
    min.antagonist <- 1000000
    spread <- 0
    if(AUC.R1>cutoff) {
        if(variable=="AC50") temp <- MAT.AC50[code,agonist.assays]
        if(variable=="AC10") temp <- MAT.AC10[code,agonist.assays]
        if(variable=="ACC") temp <- MAT.ACC[code,agonist.assays]
        if(variable=="ACB") temp <- MAT.ACB[code,agonist.assays]
        temp <- temp[!is.na(temp)]
        temp <- temp[!is.nan(temp)]
        temp <- temp[temp>0]
        if(length(temp)>0) {
            temp <- temp[temp<1000000]
	        if(length(temp)>0) {
	            temp <- -log10(temp/1000000)
	            spread <- max(temp)-min(temp)
	            tmedian <- median(temp)
	            if(length(temp)>2) tmad <- mad(temp)
	            else tmad <- 0.5
	            median.agonist <- 1000000 * 10**(-tmedian)
	            min.agonist <- 1000000 * 10**(-(tmedian+3*tmad))
	    	}
        }
    }
    if(AUC.R2>cutoff) {
        if(variable=="AC50") temp <- MAT.AC50[code,antagonist.assays]
        if(variable=="AC10") temp <- MAT.AC10[code,antagonist.assays]
        if(variable=="ACC") temp <- MAT.ACC[code,antagonist.assays]
        if(variable=="ACB") temp <- MAT.ACB[code,antagonist.assays]
        temp <- temp[!is.na(temp)]
        temp <- temp[!is.nan(temp)]
        temp <- temp[temp>0]
        if(length(temp)>0) {
            temp <- temp[temp<1000000]
	        if(length(temp)>0) {
	            temp <- -log10(temp/1000000)
	            spread <- max(temp)-min(temp)
	            tmedian <- median(temp)
	            if(length(temp)>2) tmad <- mad(temp)
	            else tmad <- 0.5
	            median.antagonist <- 1000000 * 10**(-tmedian)
	            min.antagonist <- 1000000 * 10**(-(tmedian+3*tmad))
	        }
		}
    }
    value <- list(median.value=min(median.agonist,median.antagonist),
    			  min.value=min(min.agonist,min.antagonist),spread=spread)
    return(value)
}
#--------------------------------------------------------------------------------------
#
# calcuate the number of chemicals in activity classes
#
#===============================================================================
# Pathway-specific - start
#===============================================================================
#--------------------------------------------------------------------------------------
allstats <- function() {
	cat("allstats\n")
	flush.console()

    nrange <- 3
    rangemin <- c(0.1,0.01, -1)
    rangemax <- c(  100, 0.1, 0.01)
    results <- as.data.frame(matrix(nrow=(nrange+1),ncol=1+NRECEPTOR))
    rnames <- c("R1 (Agonist)","R2 (Antagonist)")
    for(i in 3:NRECEPTOR) rnames <- c(rnames,paste("R",i,sep=""))
    rnames <- c(rnames,"R1 (Agonist) zcut","R2 (Antagonist) zcut")
    names(results) <- c("AUC Range","R1 (Agonist)","R2 (Antagonist)",rnames[3:(NRECEPTOR)])
    for(i in 1:nrange) {
        results[i,1] <- paste(rangemin[i]," to ",rangemax[i])
        for(j in 1:NRECEPTOR) {
            receptor <- paste("R",j,sep="")
            arindex <- ar.index(receptor)
			nickname <- arindex$nickname
			col <- paste("AUC.",nickname,sep="")
            temp <- SUPERMATRIX[,col]
            temp <- temp[temp>=rangemin[i]]
            temp <- temp[temp<rangemax[i]]
            results[i,j+1] <- length(temp)
        }
    }
    results[4,1]<- "Specific"
    
    i <- nrange+1
    names(results) <- c("Range","Agonist","Antagonist","R3","R4","R5","R6","R7","R8/A16","R9","A1","A2","A3","A4","A5","A6","A7","A8","A9","A10","A11","A12","A13","A14","A15","A17","A18")
    for(j in 1:NRECEPTOR) {
    	aname <- names(results)[j+1]
    	if(aname=="R8/A16") aname <- "R8"
    	temp <- SUPERMATRIX[is.element(SUPERMATRIX[,"maximum.receptor"],aname),]
    	results[4,(j+1)] <- dim(temp)[1]
    }

    print(results)
    outfile <- paste("../output/allchem_ranges_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    write.table(results,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# explore the specificity
#
#--------------------------------------------------------------------------------------
dx.specificity.zt <- function() {
	cat("specificity.zt\n")
	flush.console()

    file <- "../output/specificity_zt.txt"

	temp.agonist <- SUPERMATRIX[SUPERMATRIX[,"AUC.Agonist"]>=SPECIFIC.AUC.CUTOFF,]
	temp.antagonist <- SUPERMATRIX[SUPERMATRIX[,"AUC.Antagonist"]>=SPECIFIC.AUC.CUTOFF,]
    s <- "Receptor\tN\t|T|\t|T|(Agonist)\tp(T)\t|Z|\t|Z|(Agonist)\tp(Z)\t\n"
    cat(s,file=file,append=F)

	for(i in 3:NRECEPTOR) {
		receptor <- paste("R",i,sep="")
		arindex <- ar.index(receptor)
		nickname <- arindex$nickname
		assay.list <- arindex$assay.list
		colname <- paste("AUC.",nickname,sep="")

		temp.receptor <- SUPERMATRIX[SUPERMATRIX[,colname]>=SPECIFIC.AUC.CUTOFF,]
		if(dim(temp.receptor)[1]>=3) {
			nassay <- length(assay.list)
			ac50.list <- assay.list
			T.list <- assay.list
			Z.list <- ac50.list
			for(j in 1:nassay) {
				ac50.list[j] <- paste(ac50.list[j],"_AC50",sep="")
				T.list[j] <- paste(T.list[j],"_T",sep="")
				Z.list[j] <- paste(Z.list[j],"_Zscore",sep="")
			}
			
			cat("\n==================================\n",nickname,":",dim(temp.receptor)[1],"\n==================================\n")
			
		    s <- paste(nickname,"\t",dim(temp.receptor)[1],"\t",sep="")

			x <- temp.receptor[,T.list]
			x <- x[temp.receptor[,ac50.list]<1000000]
			y <- temp.agonist[,T.list]
			y <- y[temp.agonist[,ac50.list]<1000000]	
			if(receptor=="R9") {
				y <- temp.antagonist[,T.list]
				y <- y[temp.antagonist[,ac50.list]<1000000]	
			}
			t.ac50 <- t.test(x=x,y=y,alternative="less")			
			cat("Emax: \t",format(mean(x),digits=2),":",format(mean(y),digits=2),":",format(t.ac50$p.value,digits=2),"\n")
#		    s <- paste(s,format(mean(x),digits=2),"\t",format(mean(y),digits=2),"\t",format(t.ac50$p.value,digits=2),"\t",sep="")
		    s <- paste(s,format(mean(x),digits=2)," (",format(sd(x),digits=2),")","\t",format(mean(y),digits=2)," (",format(sd(y),digits=2),")","\t",format(t.ac50$p.value,digits=2),"\t",sep="")

			x <- temp.receptor[,Z.list]
			x <- x[temp.receptor[,ac50.list]<1000000]
			y <- temp.agonist[,Z.list]
			y <- y[temp.agonist[,ac50.list]<1000000]	
			if(nickname=="R9" || nickname=="A17" || nickname=="A18") {
				y <- temp.antagonist[,Z.list]
				y <- y[temp.antagonist[,ac50.list]<1000000]				
			}
			z.ac50 <- t.test(x=x,y=y,alternative="less")			
			cat("Z: \t",format(mean(x),digits=2),":",format(mean(y),digits=2),":",format(z.ac50$p.value,digits=2),"\n")
		    s <- paste(s,format(mean(x),digits=2)," (",format(sd(x),digits=2),")","\t",format(mean(y),digits=2)," (",format(sd(y),digits=2),")","\t",format(z.ac50$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)

			print(sort(unique(temp.receptor[,"use_category"])))
			cat("\n\n")
			print(sort(unique(temp.receptor[,"structure_category"])))
			cat("\n\n")
			print(sort(unique(temp.receptor[,"target_gene"])))
		}
	}
}
#--------------------------------------------------------------------------------------
#
# explore the specificity based on structure
#
#--------------------------------------------------------------------------------------
dx.structure.specificity.zt <- function(super=F) {
	cat("structure.specificity.ztfiltered\n")
	flush.console()

	col.name <- "maximum.receptor"
	scol.name <- "structure_category"
    file <- "../output/structure_category_specificity_zt.txt"
	if(super) {
		scol.name <- "structure_super_category"
    	file <- "../output/structure_super_category_specificity_zt.txt"
	}
	
    temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
    supermatrix <- temp[temp[,"median.T"]>50,]

    txt <- TxT(1,2,3,4)
    s <- paste("Structure_category\tReceptor\t",txt$title,"\n",sep="")
    cat(s,file=file,append=F)
	str.list <- sort(unique(supermatrix[,scol.name]))
	rec.list <- sort(unique(supermatrix[,col.name]))
	nstr <- length(str.list)
	nrec <- length(rec.list)
	for(i in 2:nstr) {
		str.class <- str.list[i]
		for(j in 1:nrec) {
			receptor <- rec.list[j]
			str.mask <- supermatrix[,scol.name]
			str.mask[] <- 0
			str.mask[is.element(supermatrix[,scol.name],str.class)] <- 1
			rec.mask <- supermatrix[,col.name]
			rec.mask[] <- 0
			rec.mask[is.element(supermatrix[,col.name],receptor)] <- 1
			rec.mask[!is.element(supermatrix[,col.name],receptor)] <- -1
			str.mask <- as.numeric(str.mask)
			rec.mask <- as.numeric(rec.mask)
			
			str.mask <- str.mask[rec.mask!=0]
			rec.mask <- rec.mask[rec.mask!=0]
			rec.mask[rec.mask== -1] <- 0
			
			a <- sum(str.mask*rec.mask)
			b <- sum(str.mask*(1-rec.mask))
			c <- sum((1-str.mask)*rec.mask)
			d <- sum((1-str.mask)*(1-rec.mask))
			txt <- TxT(a,b,c,d)
			if(a>=4) {
				s <- paste(str.class,"\t",receptor,"\t",txt$sval,"\n")
		    	cat(s,file=file,append=T)
		    	cat(s)
		    }
		}
	}
}	
#--------------------------------------------------------------------------------------
#
# explore the specificity based on z promiscuity
#
#--------------------------------------------------------------------------------------
dx.pains.ztfiltered <- function() {
	cat("pains.ztfiltered\n")
	flush.console()

	col.name <- "maximum.receptor"
    file <- "../output/pains_ztfiltered.txt"


	temp <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),]
	zlo.ref <- as.numeric(temp[,"promiscuity.lo.Z"])
	zhi.ref <- as.numeric(temp[,"promiscuity.hi.Z"])
	
    temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>=3,]
    supermatrix <- temp[temp[,"median.T"]>=50,]

    txt <- TxT(1,2,3,4)
    s <- paste("Receptor\tHIZ.ref.mean\tHIZ.receptor.mean\tp.HIZ\tLO.ref.mean\tLOZ.receptor.mean\tp.LOZ\n",sep="")
    cat(s,file=file,append=F)
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		temp <- supermatrix[is.element(supermatrix[,col.name],receptor),]
		zlo.receptor <- as.numeric(temp[,"promiscuity.lo.Z"])
		zhi.receptor <- as.numeric(temp[,"promiscuity.hi.Z"])
		if(dim(temp)[1]>=5) {
			result <- ks.test(zlo.receptor,zlo.ref,alternative="less",exact=F)
			s <- paste(receptor,"\t",format(mean(zlo.ref),digits=2),"\t",format(mean(zlo.receptor),digits=2),"\t",format(result$p.value,digits=2),"\t",sep="")
			result <- ks.test(zhi.receptor,zhi.ref,alternative="less",exact=F)
			s <- paste(s,format(mean(zhi.ref),digits=2),"\t",format(mean(zhi.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}
}	
#--------------------------------------------------------------------------------------
#
# explore the cross-talk in attagene
#
#--------------------------------------------------------------------------------------
dx.atg.crosstalk <- function() {
	cat("atg.crosstalk\n")
	flush.console()

	col.name <- "maximum.receptor"
    file <- "../output/atg_crosstalk.txt"
	assays <- names(TOXCAST.TESTED)
	atg.assays <- assays[grep("ATG_",assays)]
	cis.assays <- atg.assays[grep("CIS_",atg.assays)]
	trans.assays <- atg.assays[grep("TRANS_",atg.assays)]
	cis.assays <- cis.assays[!is.element(cis.assays,"ATG_ERE_CIS_up")]
	trans.assays <- trans.assays[!is.element(trans.assays,"ATG_ERa_TRANS_up")]

	z.trans <- TOXCAST.ZMAT[CODE.LIST,trans.assays]
	z.cis <- TOXCAST.ZMAT[CODE.LIST,cis.assays]
	z.trans[is.na(z.trans)] <- 0
	z.cis[is.na(z.cis)] <- 0
	z.trans[z.trans<3] <- 0
	z.cis[z.cis<3] <- 0
	z.trans[z.trans>0] <- 1
	z.cis[z.cis>0] <- 1
	hits.trans <- rowSums(z.trans)
	hits.cis <- rowSums(z.cis)

	temp <- SUPERMATRIX
	temp <- cbind(temp,hits.trans)
	names(temp)[dim(temp)[2]] <- "ATG.nonER.hits.HIZ.TRANS"
	temp <- cbind(temp,hits.cis)
	names(temp)[dim(temp)[2]] <- "ATG.nonER.hits.HIZ.CIS"
	SUPERMATRIX <<- temp
	
	ref.codes <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
	cis.ref <- as.numeric(hits.cis[ref.codes])
	trans.ref <- as.numeric(hits.trans[ref.codes])
	
	temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
	supermatrix <- temp[temp[,"median.T"]>50,]

    s <- paste("Receptor\tcis.ref.mean\tcis.receptor.mean\tp.cis\ttrans.ref.mean\ttrans.receptor.mean\tp.trans\n",sep="")
    cat(s,file=file,append=F)
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		receptor.codes <- supermatrix[is.element(supermatrix[,col.name],receptor),"CODE"]
		cis.receptor <- as.numeric(hits.cis[receptor.codes])
		trans.receptor <- as.numeric(hits.trans[receptor.codes])
		if(length(cis.receptor)>=5) {
			result <- ks.test(cis.receptor,cis.ref,alternative="less",exact=F)
			s <- paste(receptor,"\t",format(mean(cis.ref),digits=2),"\t",format(mean(cis.receptor),digits=2),"\t",format(result$p.value,digits=2),"\t",sep="")
			result <- ks.test(trans.receptor,trans.ref,alternative="less",exact=F)
			s <- paste(s,format(mean(trans.ref),digits=2),"\t",format(mean(trans.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}
	outfile <- "../output/superMatrix_ATG.csv"
	write.csv(SUPERMATRIX,file=outfile, row.names=F)
}	
#--------------------------------------------------------------------------------------
#
# explore the cross-talk in novascreen
#
#--------------------------------------------------------------------------------------
dx.nvs.crosstalk <- function() {
	cat("nvs.crosstalk\n")
	flush.console()

	col.name <- "maximum.receptor"
    file <- "../output/nvs_crosstalk.txt"
	assays <- names(TOXCAST.TESTED)
	nvs.assays <- assays[grep("NVS_NR",assays)]
	nvs.assays <- nvs.assays[!is.element(nvs.assays,"NVS_NR_bER")]
	nvs.assays <- nvs.assays[!is.element(nvs.assays,"NVS_NR_hER")]
	nvs.assays <- nvs.assays[!is.element(nvs.assays,"NVS_NR_mERa")]

#	"Tox21_ERa_BLA_Agonist_ratio"	
#	"Tox21_ERa_LUC_BG1_Agonist"	
#	"Tox21_ERa_BLA_Antagonist_ratio"
#	"Tox21_ERa_LUC_BG1_Antagonist"

	z.nvs <- TOXCAST.ZMAT[CODE.LIST,nvs.assays]
	z.nvs[is.na(z.nvs)] <- 0
	z.nvs[z.nvs<3] <- 0
	z.nvs[z.nvs>0] <- 1
	hits.nvs <- rowSums(z.nvs)
	temp <- SUPERMATRIX
	temp <- cbind(temp,hits.nvs)
	names(temp)[dim(temp)[2]] <- "NVS.nonER.hits.HIZ"
	SUPERMATRIX <<- temp
	
	ref.codes <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
	nvs.ref <- as.numeric(hits.nvs[ref.codes])
	
	temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
	supermatrix <- temp[temp[,"median.T"]>50,]

    s <- paste("Receptor\tnvs.ref.mean\tnvs.receptor.mean\tp.nvs\n",sep="")
    cat(s,file=file,append=F)
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		receptor.codes <- supermatrix[is.element(supermatrix[,col.name],receptor),"CODE"]
		nvs.receptor <- as.numeric(hits.nvs[receptor.codes])
		if(length(nvs.receptor)>=5) {
			result <- ks.test(nvs.receptor,nvs.ref,alternative="less",exact=F)
			s <- paste(receptor,"\t",format(mean(nvs.ref),digits=2),"\t",format(mean(nvs.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}
	outfile <- "../output/superMatrix_ATG_NVS.csv"
	write.csv(SUPERMATRIX,file=outfile, row.names=F)
}	
#--------------------------------------------------------------------------------------
#
# explore the cross-talk in tox21
#
#--------------------------------------------------------------------------------------
dx.tox21.crosstalk <- function() {
	cat("tox21.crosstalk\n")
	flush.console()
	temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
	supermatrix <- temp[temp[,"median.T"]>50,]

	col.name <- "maximum.receptor"
    file <- "../output/tox21_crosstalk.txt"
    s <- paste("AssaySet\tReceptor\tTox1.ref.mean\tTox21.receptor.mean\tp.Tox21\n",sep="")
    cat(s,file=file,append=F)
	
	assay.set <- "BLA.Agonist"
	assays <- names(TOXCAST.TESTED)
	tox21.assays <- assays[grep("Tox21",assays)]
	tox21.assays <- tox21.assays[grep("Agonist",tox21.assays)]
	tox21.assays <- tox21.assays[grep("BLA",tox21.assays)]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_BLA_Agonist_ratio")]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_BLA_Antagonist_ratio")]
	z.tox21 <- TOXCAST.ZMAT[CODE.LIST,tox21.assays]
	z.tox21[is.na(z.tox21)] <- 0
	z.tox21[z.tox21<3] <- 0
	z.tox21[z.tox21>0] <- 1
	hits.tox21 <- rowSums(z.tox21)
	temp <- SUPERMATRIX
	temp <- cbind(temp,hits.tox21)
	names(temp)[dim(temp)[2]] <- "Tox21.BLA.Agonist.nonER.hits.HIZ"
	SUPERMATRIX <<- temp

	ref.codes <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
	tox21.ref <- as.numeric(hits.tox21[ref.codes])
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		receptor.codes <- supermatrix[is.element(supermatrix[,col.name],receptor),"CODE"]
		tox21.receptor <- as.numeric(hits.tox21[receptor.codes])
		if(length(tox21.receptor)>=5) {
			result <- ks.test(tox21.receptor,tox21.ref,alternative="less",exact=F)
			s <- paste(assay.set,"\t",receptor,"\t",format(mean(tox21.ref),digits=2),"\t",format(mean(tox21.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}

	assay.set <- "BLA.Antagonist"
	assays <- names(TOXCAST.TESTED)
	tox21.assays <- assays[grep("Tox21",assays)]
	tox21.assays <- tox21.assays[grep("Antagonist",tox21.assays)]
	tox21.assays <- tox21.assays[grep("BLA",tox21.assays)]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_BLA_Agonist_ratio")]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_BLA_Antagonist_ratio")]
	z.tox21 <- TOXCAST.ZMAT[CODE.LIST,tox21.assays]
	z.tox21[is.na(z.tox21)] <- 0
	z.tox21[z.tox21<3] <- 0
	z.tox21[z.tox21>0] <- 1
	hits.tox21 <- rowSums(z.tox21)
	temp <- SUPERMATRIX
	temp <- cbind(temp,hits.tox21)
	names(temp)[dim(temp)[2]] <- "Tox21.BLA.Antagonist.nonER.hits.HIZ"
	SUPERMATRIX <<- temp
	ref.codes <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
	tox21.ref <- as.numeric(hits.tox21[ref.codes])
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		receptor.codes <- supermatrix[is.element(supermatrix[,col.name],receptor),"CODE"]
		tox21.receptor <- as.numeric(hits.tox21[receptor.codes])
		if(length(tox21.receptor)>=5) {
			result <- ks.test(tox21.receptor,tox21.ref,alternative="less",exact=F)
			s <- paste(assay.set,"\t",receptor,"\t",format(mean(tox21.ref),digits=2),"\t",format(mean(tox21.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}

	assay.set <- "LUC.Agonist"
	assays <- names(TOXCAST.TESTED)
	tox21.assays <- assays[grep("Tox21",assays)]
	tox21.assays <- tox21.assays[grep("Agonist",tox21.assays)]
	tox21.assays <- tox21.assays[grep("LUC",tox21.assays)]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_LUC_BG1_Agonist")]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_LUC_BG1_Antagonist")]
	z.tox21 <- TOXCAST.ZMAT[CODE.LIST,tox21.assays]
	z.tox21[is.na(z.tox21)] <- 0
	z.tox21[z.tox21<3] <- 0
	z.tox21[z.tox21>0] <- 1
	hits.tox21 <- rowSums(z.tox21)
	temp <- SUPERMATRIX
	temp <- cbind(temp,hits.tox21)
	names(temp)[dim(temp)[2]] <- "Tox21.LUC.Agonist.nonER.hits.HIZ"
	SUPERMATRIX <<- temp
	ref.codes <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
	tox21.ref <- as.numeric(hits.tox21[ref.codes])
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		receptor.codes <- supermatrix[is.element(supermatrix[,col.name],receptor),"CODE"]
		tox21.receptor <- as.numeric(hits.tox21[receptor.codes])
		if(length(tox21.receptor)>=5) {
			result <- ks.test(tox21.receptor,tox21.ref,alternative="less",exact=F)
			s <- paste(assay.set,"\t",receptor,"\t",format(mean(tox21.ref),digits=2),"\t",format(mean(tox21.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}

	assay.set <- "LUC.Antagonist"
	assays <- names(TOXCAST.TESTED)
	tox21.assays <- assays[grep("Tox21",assays)]
	tox21.assays <- tox21.assays[grep("Antagonist",tox21.assays)]
	tox21.assays <- tox21.assays[grep("LUC",tox21.assays)]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_LUC_BG1_Agonist")]
	tox21.assays <- tox21.assays[!is.element(tox21.assays,"Tox21_ERa_LUC_BG1_Antagonist")]
	z.tox21 <- TOXCAST.ZMAT[CODE.LIST,tox21.assays]
	z.tox21[is.na(z.tox21)] <- 0
	z.tox21[z.tox21<3] <- 0
	z.tox21[z.tox21>0] <- 1
	hits.tox21 <- rowSums(z.tox21)
	temp <- SUPERMATRIX
	temp <- cbind(temp,hits.tox21)
	names(temp)[dim(temp)[2]] <- "Tox21.LUC.Antagonist.nonER.hits.HIZ"
	SUPERMATRIX <<- temp
	ref.codes <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
	tox21.ref <- as.numeric(hits.tox21[ref.codes])
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		receptor.codes <- supermatrix[is.element(supermatrix[,col.name],receptor),"CODE"]
		tox21.receptor <- as.numeric(hits.tox21[receptor.codes])
		if(length(tox21.receptor)>=5) {
			result <- ks.test(tox21.receptor,tox21.ref,alternative="less",exact=F)
			s <- paste(assay.set,"\t",receptor,"\t",format(mean(tox21.ref),digits=2),"\t",format(mean(tox21.receptor),digits=2),"\t",format(result$p.value,digits=2),"\n",sep="")
		    cat(s,file=file,append=T)
		    cat(s)
		}
	}
	outfile <- "../output/superMatrix_ATG_NVS_Tox21.csv"
	write.csv(SUPERMATRIX,file=outfile, row.names=F)
}	
#--------------------------------------------------------------------------------------
#
# explore the flag filtering
#
#--------------------------------------------------------------------------------------
dx.flag.filter <- function() {
	cat("flag.filter\n")
	flush.console()

	col.name <- "maximum.receptor"
	file <- "../output/flag_filter_ztfiltered.txt"
    s <- paste("Assay\tN\tmean.in\tmean.out\tp.value\n",sep="")
    cat(s,file=file,append=F)
    cat(s)

    temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
    supermatrix <- temp[temp[,"median.T"]>50,]
	
	for(i in 1:NASSAY) {
		flag.col <- paste(ASSAY.LIST[i],"_flags",sep="")
		aname <- paste("A",i,sep="")
		mask <- supermatrix[,col.name]
		vals.in <- supermatrix[is.element(mask,aname),flag.col]
		vals.out <- supermatrix[is.element(mask,"Agonist"),flag.col]
		mean.in <- mean(vals.in)
		mean.out <- mean(vals.out)
		
		n.in <- length(vals.in)		
		if(n.in>=5) {
			result <- ks.test(vals.in,vals.out,alternative="less",exact=F)
			p.val <- result$p.value
			s <- paste(aname,"\t",n.in,"\t",format(mean.in,digits=2),"\t",format(mean.out,digits=2),"\t",format(p.val,digits=2),"\n",sep="")
			cat(s,file=file,append=T)
		    cat(s)
		}
	}
}		
#--------------------------------------------------------------------------------------
#
# explore the specificity based on physchem properties
#
#--------------------------------------------------------------------------------------
dx.physchem.specificity.ztfiltered <- function() {
	cat("physchem.specificity.ztfiltered\n")
	flush.console()

	col.name <- "maximum.receptor"
	file <- "../output/physchem_specificity_ztfiltered.txt"

    filename <- "../input/ToxCast_physchem_QP_Chembl_electrophil_DFT.csv"
    physchem <- read.csv(file=filename,stringsAsFactors=F)
    
    temp <- physchem[,"species"]
    temp2 <- temp
    temp2[] <- 0
    temp2 <- as.numeric(temp2)
    temp2[] <- NA
    temp2[is.element(temp,"NEUTRAL")] <- 0
    temp2[is.element(temp,"ACID")] <- 1
    temp2[is.element(temp,"BASE")] <- 1
    physchem <- cbind(physchem,temp2)
    names(physchem)[dim(physchem)[2]] <- "Charged"
    

    s <- paste("Variable\tReceptor\tN.in\tN.out\tnorm.in\tnorm.out\tp.value\n")
    cat(s,file=file,append=F)
    temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>=3,]
    supermatrix <- temp[temp[,"median.T"]>=50,]
    #PHYSCHEM <<- physchem
	rownames(physchem) <- physchem[,"CODE"]
	#physchem <- physchem[row.names(supermatrix),]
	pnames <- names(physchem)[10:dim(physchem)[2]]
	nparam <- length(pnames)
	pclass <- pnames
	pclass[] <- "numeric"
	pclass[38] <- "character"
	pclass[39] <- "character"
    
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		rec.mask <- supermatrix[,col.name]
		rec.mask[] <- 0
		rec.mask[is.element(supermatrix[,col.name],receptor)] <- 1
		codes.in <- supermatrix[rec.mask==1,"CODE"]
		codes.out <- SUPERMATRIX[is.element(SUPERMATRIX[,col.name],"None"),"CODE"]
		for(j in 1:nparam) {
			if(pclass[j]=="numeric" && length(codes.in)>=5) {
				param <- pnames[j]
				y.in <- physchem[codes.in,param]
				y.out <- physchem[codes.out,param]
				y.in <- y.in[!is.na(y.in)]
				y.out <- y.out[!is.na(y.out)]
				
				n.in <- length(y.in)
				n.out <- length(y.out)
				mean.in <- mean(y.in)
				mean.out <- mean(y.out)
				p.val <- 1
				if(!is.na(mean.in)) {
					if(!is.na(mean.out)) {
						if(n.in>=5 && n.out>=5) {
							res <- t.test(y.in,y.out)
							cat(param," : ",receptor,"\n")
							print(res)
							p.val <- res$p.value
						}
					}
				}
				if(n.in>=5) {
					s <- paste(param,"\t",receptor,"\t",n.in,"\t",n.out,"\t",format(mean.in,digits=2),"\t",format(mean.out,digits=2),"\t",format(p.val,digits=2),"\n",sep="")
					cat(s,file=file,append=T)
		    		cat(s)		
		    	}
			}
		}
	}
	physchem <- physchem[CODE.LIST,10:dim(physchem)[2]]
	temp <- cbind(SUPERMATRIX,physchem)
	SUPERMATRIX <<- temp
	outfile <- "../output/superMatrix_ATG_NVS_Tox21_physchem.csv"
	write.csv(SUPERMATRIX,file=outfile, row.names=F)
}		
#--------------------------------------------------------------------------------------
#
# physchem specificity using PCA
#
#--------------------------------------------------------------------------------------
physchem.pca <- function() {
	cat("physchem.pca\n")
	flush.console()

	col.name <- "maximum.receptor"

    filename <- "../input/ToxCast_physchem_QP_Chembl_electrophil_DFT.csv"
    physchem <- read.csv(file=filename,stringsAsFactors=F)
    temp <- physchem[,"species"]
    temp2 <- temp
    temp2[] <- 0
    temp2 <- as.numeric(temp2)
    temp2[] <- NA
    temp2[is.element(temp,"NEUTRAL")] <- 0
    temp2[is.element(temp,"ACID")] <- 1
    temp2[is.element(temp,"BASE")] <- 1
    physchem <- cbind(physchem,temp2)
    names(physchem)[dim(physchem)[2]] <- "Charged"
    rownames(physchem) <- physchem[,"CODE"]
    ncol <- dim(physchem)[2]
    mask <- vector(length=ncol,mode="integer")
    mask[] <- 1
    mask[1:6] <- 0
    mask[47:48] <- 0
    physchem <- physchem[,mask==1]

    temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
    supermatrix <- temp[temp[,"median.T"]>50,]
    
	pnames <- names(physchem)
	nparam <- length(pnames)
	physchem <- physchem[row.names(supermatrix),]
    cat("dim(physchem): ",dim(physchem),"\n")
    physchem[is.na(physchem)] <- 1000000
	cs <- floor(colSums(physchem)/1000000)
	cs[cs>5] <- 0
	physchem <- physchem[,cs>0]
	
    mask <- rowSums(physchem)
    mask[mask<1000000] <- 1
    mask[mask>=1000000] <- 0
    print(sum(mask))
    physchem <- physchem[mask==1,]
    supermatrix <- supermatrix[mask==1,]    
    cat("dim(physchem): ",dim(physchem),"\n")    

	rnames <- supermatrix[,col.name]
	rnames[is.element(rnames,"A13A17")] <- "A13"
	rnames[is.element(rnames,"A15R7")] <- "A15"
	rnames[is.element(rnames,"A18R8R9")] <- "A18"
	rnames[is.element(rnames,"A18R9")] <- "A1"
	rnames[is.element(rnames,"A3A17A3R2A5A12")] <- "A3"
	rnames[is.element(rnames,"R1R6")] <- "Agonist"
	rnames[is.element(rnames,"R1R8")] <- "Agonist"
	rnames[is.element(rnames,"R3R1")] <- "Agonist"
	rnames[is.element(rnames,"R6A12")] <- "Agonist"
	rnames[is.element(rnames,"A3R12")] <- "A3"
	rnames[is.element(rnames,"A5R12")] <- "A5"
	rnames[is.element(rnames,"A5A12")] <- "A5"
	rnames[is.element(rnames,"A3A17")] <- "A3"
	rnames[is.element(rnames,"A3R2")] <- "A3"

	rec.list <- sort(unique(rnames))
	print(rec.list)
	nrec <- length(rec.list)

	result <- prcomp(physchem,scale=T)
	rnames.2 <- rnames
	rnames.2[] <- "Other"
	rnames.2[is.element(rnames,"Agonist")] <- "Agonist"
	rnames.2[is.element(rnames,"Antagonist")] <- "Antagonist"
	
	pca2d(result,group=rnames.2,components=c(1,2))
	#file <- "../output/physchem_specificity_ztfiltered.txt"
   # s <- paste("Variable\tReceptor\tN.in\tN.out\tnorm.in\tnorm.out\tp.value\n")
   # cat(s,file=file,append=F)

    browser()

}		
#--------------------------------------------------------------------------------------
#
# prepare the literature DB
#
#--------------------------------------------------------------------------------------
prep.literature <- function() {
	cat("prep.literature\n")
	flush.console()

    filename <- "../input/literature/CASRN_index_summary.csv"
    index <- read.csv(file=filename,stringsAsFactors=F)

    filename <- "../input/literature/Literature_consensus_summary.csv"
    counter <- read.csv(file=filename,stringsAsFactors=F)
    
    cnames <- c("CODE","CASRN","Name","CERAPP_ID","Lit.N.Binding","Lit.N.Agonist","Lit.N.Antagonist","Lit.Hit.Binding","Lit.Hit.Agonist","Lit.Hit.Antagonist")
    
    lit <- as.data.frame(matrix(nrow=NCHEM,ncol=length(cnames)))
    names(lit) <- cnames
    lit[,"CODE"] <- CHEMS[,"CODE"]
    lit[,"CASRN"] <- CHEMS[,"CASRN"]
    lit[,"Name"] <- CHEMS[,"Name"]
    rownames(lit) <- lit[,"CODE"]
    lit[,"CERAPP_ID"] <- NA
    lit[,5:10] <- 0
    
    for(i in 1:NCHEM) {
    	casrn <- lit[i,"CASRN"]
    	temp <- index[is.element(index[,"CASRN"],casrn),]
    	if(dim(temp)[1]>0) {
    		cid <- temp[1,"CERAPP_ID"]
    		lit[i,"CERAPP_ID"] <- cid
    		temp2 <- counter[is.element(counter[,"CERAPP_ID"],cid),]
    		if(dim(temp2)[1]>0) {
    			for(j in 1:dim(temp2)[1]) {
    				mode <- temp2[j,"Mode"]
    				count <- temp2[j,"Count"]
    				hit <- temp2[j,"ratio_Thr_0.2"]
    				if(mode=="Binding") {
    					lit[i,"Lit.N.Binding"] <- count
    					lit[i,"Lit.Hit.Binding"] <- hit
    				}
    				else if(mode=="Agonist") {
    					lit[i,"Lit.N.Agonist"] <- count
    					lit[i,"Lit.Hit.Agonist"] <- hit
    				}
    				else if(mode=="Antagonist") {
    					lit[i,"Lit.N.Antagonist"] <- count
    					lit[i,"Lit.Hit.Antagonist"] <- hit
    				}
    			}
    		}
    	}
    }

    LITERATURE <<- lit
    outfile <- "../input/literature/ER_literature_summary.csv"
    write.csv(lit,file=outfile, row.names=F)

}		
#--------------------------------------------------------------------------------------
#
# compare with the literature
#
#--------------------------------------------------------------------------------------
comp.lit <- function(nmin=4) {
	cat("comp.lit\n")
	flush.console()

    file <- paste("../output/comp_literature_min_reports_",nmin,".txt",sep="")
    s <- "ChemicalSets\tReceptor\tNBindingPos\tNBindingNeg\tNAgonistPos\tNAgonistNeg\tNAntagonistPos\tNAntagonistNeg\tTotal\tMinimumReport\n"
    cat(s,file=file,append=F)
	col.name <- "maximum.receptor"


	chemicalSet <- "Initial"
    supermatrix <- SUPERMATRIX
    supermatrix[is.na(supermatrix[,"Lit.N.Binding"]),"Lit.N.Binding"] <- 0
    supermatrix[is.na(supermatrix[,"Lit.N.Agonist"]),"Lit.N.Agonist"] <- 0
    supermatrix[is.na(supermatrix[,"Lit.N.Antagonist"]),"Lit.N.Antagonist"] <- 0
    
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		temp <- supermatrix[is.element(supermatrix[,col.name],receptor),]
		temp.binding <- temp[temp[,"Lit.N.Binding"]>=nmin,"Lit.Hit.Binding"]
		temp.binding <- temp.binding[!is.na(temp.binding)]
		n.binding <- length(temp.binding)
		pos.binding <- sum(temp.binding)
		neg.binding <- n.binding-pos.binding

		temp.agonist <- temp[temp[,"Lit.N.Agonist"]>=nmin,"Lit.Hit.Agonist"]
		n.agonist <- length(temp.agonist)
		pos.agonist <- sum(temp.agonist)
		neg.agonist <- n.agonist-pos.agonist

		temp.antagonist <- temp[temp[,"Lit.N.Antagonist"]>=nmin,"Lit.Hit.Antagonist"]
		n.antagonist <- length(temp.antagonist)
		pos.antagonist <- sum(temp.antagonist)
		neg.antagonist <- n.antagonist-pos.antagonist

		ntotal <- n.binding+n.agonist+n.antagonist
	    if(ntotal>0) {
	    	s <- paste(chemicalSet,"\t",receptor,"\t",pos.binding,"\t",neg.binding,"\t",pos.agonist,"\t",neg.agonist,"\t",pos.antagonist,"\t",neg.antagonist,"\t",ntotal,"\t",nmin,"\n",sep="")
	    	cat(s,file=file,append=T)
			cat(s)
		}
	}

	chemicalSet <- "Final"
    temp <- SUPERMATRIX[SUPERMATRIX[,"median.Z"]>3,]
    supermatrix <- temp[temp[,"median.T"]>50,]
    supermatrix[is.na(supermatrix[,"Lit.N.Binding"]),"Lit.N.Binding"] <- 0
    supermatrix[is.na(supermatrix[,"Lit.N.Agonist"]),"Lit.N.Agonist"] <- 0
    supermatrix[is.na(supermatrix[,"Lit.N.Antagonist"]),"Lit.N.Antagonist"] <- 0
    
	rec.list <- sort(unique(supermatrix[,col.name]))
	nrec <- length(rec.list)
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		temp <- supermatrix[is.element(supermatrix[,col.name],receptor),]
		temp.binding <- temp[temp[,"Lit.N.Binding"]>=nmin,"Lit.Hit.Binding"]
		temp.binding <- temp.binding[!is.na(temp.binding)]
		n.binding <- length(temp.binding)
		pos.binding <- sum(temp.binding)
		neg.binding <- n.binding-pos.binding

		temp.agonist <- temp[temp[,"Lit.N.Agonist"]>=nmin,"Lit.Hit.Agonist"]
		n.agonist <- length(temp.agonist)
		pos.agonist <- sum(temp.agonist)
		neg.agonist <- n.agonist-pos.agonist

		temp.antagonist <- temp[temp[,"Lit.N.Antagonist"]>=nmin,"Lit.Hit.Antagonist"]
		n.antagonist <- length(temp.antagonist)
		pos.antagonist <- sum(temp.antagonist)
		neg.antagonist <- n.antagonist-pos.antagonist

		ntotal <- n.binding+n.agonist+n.antagonist
	    if(ntotal>0) {
			s <- paste(chemicalSet,"\t",receptor,"\t",pos.binding,"\t",neg.binding,"\t",pos.agonist,"\t",neg.agonist,"\t",pos.antagonist,"\t",neg.antagonist,"\t",ntotal,"\t",nmin,"\n",sep="")
		    cat(s,file=file,append=T)
			cat(s)
		}
	}
}
#--------------------------------------------------------------------------------------
#
# print out the receptor tree
#
#--------------------------------------------------------------------------------------
receptor.tree <- function(cutoff) {
	cat("receptor.tree\n")
	flush.console()

	n0 <- dim(SUPERMATRIX)[1]
	cat("======================================\n")
	cat(">>> Ntotal: ",n0,"\n")
	
	temp1 <- SUPERMATRIX[!is.element(SUPERMATRIX[,"maximum.receptor"],"None"),]
	n1 <- dim(temp1)[1]
	cat("======================================\n")
	cat(">>> N1 (actives):     ",n1,"\n")
	cat(">>> N0-N1 (inactive): ",(n0-n1),"\n")
	
	temp2 <- temp1[temp1[,"median.T"]>3,]
	n2 <- dim(temp2)[1]
	cat("======================================\n")
	cat(">>> N2 (high T):   ",n2,"\n")
	cat(">>> N1-N2 (low T): ",(n1-n2),"\n")

	temp3 <- temp2[temp2[,"median.Z"]>3,]
	n3 <- dim(temp3)[1]
	cat("======================================\n")
	cat(">>> N3 (high Z):   ",n3,"\n")
	cat(">>> N2-N3 (low Z): ",(n2-n3),"\n")
	
	cat("======================================\n")
	rec.list <- sort(unique(temp3[,"maximum.receptor"]))
	nrec <- length(rec.list)
	rcount <- vector(length=nrec,mode="integer")
	for(i in 1:nrec) {
		receptor <- rec.list[i]
		temp4 <- temp3[is.element(temp3[,"maximum.receptor"],receptor),]
		rcount[i] <- dim(temp4)[1]
	}
	ix <- sort(rcount,index.return=T,decreasing=T)$ix
	for(i in 1:nrec) {
		index <- ix[i]
		if(rcount[index]>=5) cat(rec.list[index]," \t ",rcount[index],"\n")
	}
}	
#--------------------------------------------------------------------------------------
#
# calvulate a Hill function
#
#--------------------------------------------------------------------------------------
hill <- function(x,mid,wid,bot=0,top=1) {
	rng <- top-bot
	val <- bot + rng*(x**wid/(x**wid + mid**wid))
	return(val)
}
#--------------------------------------------------------------------------------------
#
# calculate a sigmoid function
#
#--------------------------------------------------------------------------------------
sigmoid <- function(x,mid,wid) {
	val <- 1 / (1+exp(-(x-mid)/wid))
	return(val)
}
#--------------------------------------------------------------------------------------
#
# give various identifiers for each receptor
#
#===============================================================================
# Pathway-specific - start
#===============================================================================
#--------------------------------------------------------------------------------------
ar.index <- function(receptor) {

	al <- 
	c("NVS_NR_bER",	
	"NVS_NR_hER",	
	"NVS_NR_mERa",	
	"OT_ER_ERaERa_0480",	
	"OT_ER_ERaERa_1440",	
	"OT_ER_ERaERb_0480",	
	"OT_ER_ERaERb_1440",	
	"OT_ER_ERbERb_0480",	
	"OT_ER_ERbERb_1440",	
	"OT_ERa_EREGFP_0120",	
	"OT_ERa_EREGFP_0480",	
	"ATG_ERa_TRANS_up",	
	"ATG_ERE_CIS_up",	
	"Tox21_ERa_BLA_Agonist_ratio",	
	"Tox21_ERa_LUC_BG1_Agonist",	
	"ACEA_T47D_80hr_Positive",	
	"Tox21_ERa_BLA_Antagonist_ratio",
	"Tox21_ERa_LUC_BG1_Antagonist")


	nickname <- receptor
	if(receptor=="R1") {
		nickname <- "Agonist"
		assay.list <- c(al[1],al[2],al[3],al[4],al[5],al[6],al[7],al[8],al[9],al[10],al[11],al[12],al[13],al[14],al[15],al[16])
	}
	if(receptor=="R2") {
		nickname <- "Antagonist"
		assay.list <- c(al[1],al[2],al[3],al[4],al[5],al[6],al[7],al[8],al[9],al[10],al[11],al[17],al[18])
	}
	if(receptor=="R3") {
		assay.list <- c(al[1],al[2],al[3])
	}
	if(receptor=="R4") {
		assay.list <- c(al[4],al[5],al[6],al[7],al[8],al[9])
	}
	if(receptor=="R5") {
		assay.list <- c(al[10],al[11])
	}
	if(receptor=="R6") {
		assay.list <- c(al[12],al[13])
	}
	if(receptor=="R7") {
		assay.list <- c(al[14],al[15])
	}
	if(receptor=="R8") {
		assay.list <- c(al[16])
	}
	if(receptor=="R9") {
		assay.list <- c(al[17],al[18])
	}
	if(receptor=="R10") {
		nickname <- "A1"
		assay.list <- c(al[1])
	}
	if(receptor=="R11") {
		nickname <- "A2"
		assay.list <- c(al[2])
	}
	if(receptor=="R12") {
		nickname <- "A3"
		assay.list <- c(al[3])
	}
	if(receptor=="R13") {
		nickname <- "A4"
		assay.list <- c(al[4])
	}
	if(receptor=="R14") {
		nickname <- "A5"
		assay.list <- c(al[5])
	}
	if(receptor=="R15") {
		nickname <- "A6"
		assay.list <- c(al[6])
	}
	if(receptor=="R16") {
		nickname <- "A7"
		assay.list <- c(al[7])
	}
	if(receptor=="R17") {
		nickname <- "A8"
		assay.list <- c(al[8])
	}
	if(receptor=="R18") {
		nickname <- "A9"
		assay.list <- c(al[9])
	}
	if(receptor=="R19") {
		nickname <- "A10"
		assay.list <- c(al[10])
	}
	if(receptor=="R20") {
		nickname <- "A11"
		assay.list <- c(al[11])
	}
	if(receptor=="R21") {
		nickname <- "A12"
		assay.list <- c(al[12])
	}
	if(receptor=="R22") {
		nickname <- "A13"
		assay.list <- c(al[13])
	}
	if(receptor=="R23") {
		nickname <- "A14"
		assay.list <- c(al[14])
	}
	if(receptor=="R24") {
		nickname <- "A15"
		assay.list <- c(al[15])
	}
	if(receptor=="R25") {
		nickname <- "A17"
		assay.list <- c(al[17])
	}
	if(receptor=="R26") {
		nickname <- "A18"
		assay.list <- c(al[18])
	}
	return(list(nickname=nickname,assay.list=assay.list))
}
#--------------------------------------------------------------------------------------
#
# compare the old and new R1 values - only for ER pathway
#
#--------------------------------------------------------------------------------------
comp.old.new <- function(to.file=F) {
	cat("comp.old.new\n")
	flush.console()
    filename <- "../output/superMatrix.csv"
    new.data <- read.csv(file=filename)
    filename <- "../input/ERModel_summary_all_toxminer_v20.txt"
    old.data <- read.table(filename,sep="\t",header=T,stringsAsFactors=F,comment.char="",quote="")

    rownames(new.data) <- new.data[,"CODE"]
    rownames(old.data) <- old.data[,"CODE"]
    code.list <- rownames(new.data)
    code.list <- code.list[is.element(code.list,rownames(old.data))]
    new.data <- new.data[code.list,]
    old.data <- old.data[code.list,]

    if(to.file) {
        fname <- paste("../plots/ER_model_old_new.pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,4,3,2))

    x <- old.data[,"R1"]
    y <- new.data[,"AUC.Agonist"]
    xlab <- "Old Agonist AUC"
    ylab <- "New Agonist AUC"
    plot(y~x,xlab=xlab,ylab=ylab,main="Agonist",cex.lab=1.2,cex.axis=1.2,xlim=c(0,1),ylim=c(0,1))
    lines(c(0,1),c(0,1))

    for(i in 1:length(x)) {
        if(abs(x[i]-y[i])>0.1) {
            code <- code.list[i]
            cname <- CHEMS[code,"Name"]
            cat("agonist: ",cname,"  old: ",format(x[i],digits=2)," new: ",format(y[i],digits=2),"\n")
            text(x[i],y[i],cname,pos=4)
        }
    }

    if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# plot the ACU vs. rotroff - only for ER pathway
#
#--------------------------------------------------------------------------------------
AUC.vs.rotroff <- function(to.file=F,mode="ref") {
   	file <- paste("../output/",mode,"chem_AUC_",PENALTY.METHOD,"_",ALPHA,".txt",sep="")
    auc <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
	rownames(auc)<- auc[,"CODE"]

    if(to.file) {
        fname <- paste("../plots/auc_vs_rotroff_",mode,".pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(2,1),mar=c(4,6,1,3))

	xag <- auc[,"R1"]
	xant <- auc[,"R2"]
	yrag <- auc[,"Rotroff_COMPOSITE_SCORE_SCALED_AGONIST"]/100
	yrant <- auc[,"Rotroff_COMPOSITE_SCORE_SCALED_ANTAGONIST"]/100

	cutoff <- 1e-3
	xag <- xag[!is.na(yrag)]
	xant <- xant[!is.na(yrag)]
	yrant <- yrant[!is.na(yrag)]
	yrag <- yrag[!is.na(yrag)]

	xag[xag<cutoff] <- cutoff
	xant[xant<cutoff] <- cutoff
	yrant[yrant<cutoff] <- cutoff
	yrag[yrag<cutoff] <- cutoff

    plot(yrag~xag,xlab="AUC(Agonist)",ylab="Rotroff Agonist",xlim=c(cutoff,1),ylim=c(0,1),log="x",cex.lab=1.2, cex.axis=1.2,type="n",xaxp=c(0.0001,1,1))#,yaxp=c(0.0001,1,1))
   	points(yrag~xag,pch=21,bg="black",cex=1)
    lines(c(1e-5,1e-5),c(cutoff,1),col="gray")
    lines(c(1e-4,1e-4),c(cutoff,1),col="gray")
    lines(c(1e-3,1e-3),c(cutoff,1),col="gray")
    lines(c(1e-2,1e-2),c(cutoff,1),col="gray")
    lines(c(1e-1,1e-1),c(cutoff,1),col="gray")
    lines(c(1,1),c(cutoff,1),col="gray")
	text(cutoff,0.9,pos=4,"Agonist")

   	plot(yrant~xant,xlab="AUC(Antagonist)",ylab="Rotroff Antagonist",xlim=c(cutoff,1),ylim=c(0,1),log="x",cex.lab=1.2, cex.axis=1.2,type="n",xaxp=c(0.0001,1,1))#,yaxp=c(0.0001,1,1))
   	points(yrant~xant,pch=21,bg="black",cex=1)
    lines(c(1e-5,1e-5),c(cutoff,1),col="gray")
    lines(c(1e-4,1e-4),c(cutoff,1),col="gray")
    lines(c(1e-3,1e-3),c(cutoff,1),col="gray")
    lines(c(1e-2,1e-2),c(cutoff,1),col="gray")
    lines(c(1e-1,1e-1),c(cutoff,1),col="gray")
    lines(c(1,1),c(cutoff,1),col="gray")
	text(cutoff,0.9,pos=4,"Antagonist")


    if(to.file) dev.off()
    else browser()

}
#--------------------------------------------------------------------------------------
#
# plot the pseudo-AC50 vs. AUC
#
#--------------------------------------------------------------------------------------
pseudoAC50.AUC <- function(to.file=F,cutpoint=SPECIFIC.AUC.CUTOFF) {
	cat("pseudoAC50.AUC\n")
	flush.console()

    if(to.file) {
        fname <- paste("../plots/pseudoAC50_vs_AUC.pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,4,3,2))

    x1 <- SUPERMATRIX[,"AUC.Agonist"]
    x2 <- SUPERMATRIX[,"AUC.Antagonist"]
    x <- rowMax(cbind(x1,x2))
    y <- SUPERMATRIX[,"pseudo.AC50.median"]
    y <- SUPERMATRIX[,"pseudo.AC50.min"] 
    
    col.auc.1 <- which.max(is.element(names(SUPERMATRIX),"AUC.Agonist"))
    col.auc.2 <- which.max(is.element(names(SUPERMATRIX),"AUC.A18"))
    auc.max <- rowMax(SUPERMATRIX[,col.auc.1:col.auc.2])
    cnames <- SUPERMATRIX[,"Name"]
    x <- x[!is.na(y)]
    x1 <- x1[!is.na(y)]
    cnames <- cnames[!is.na(y)]
    code.list <- CODE.LIST[!is.na(y)]
    auc.max <- auc.max[!is.na(y)]
    y <- y[!is.na(y)]
    
    ylab <- "max[AUC(agonist), AUC(antagonist)]"
    xlab <- "min-AC50 (uM)"
    plot(x~y,xlab=xlab,ylab=ylab,main="",cex.lab=1.2,cex.axis=1.2,ylim=c(0,1.0),xlim=c(01e-9,2e2),log="x",xaxp=c(0.000000001,1,1))
    lines(c(1e-10,1e6),c(cutpoint,cutpoint))
    lines(c(100,100),c(0,1.5))
	
	xlm <- x
	ylm <- y
	ylm <- ylm[x>0.1]
	xlm <- xlm[x>0.1]
	ylm <- log10(ylm)
	result <- lm(ylm~xlm)
	slope <- result[[1]][2]
	intercept <- result[[1]][1]
	
	x0 <- c(0,1.2)
	y0 <- x0
	y0[1] <- intercept
	y0[2] <- intercept + x0[2]*slope
	y0[1] <- 10**y0[1]
	y0[2] <- 10**y0[2]
	lines(c(y0[1],y0[2]),c(x0[1],x0[2]),lwd=2,lty="dashed")
	ilabel <- NULL
	clabel <- NULL
	counter <- 0
	rownames(REFCHEMS) <- REFCHEMS[,"CODE"]
	for(i in 1:length(x)){
		if(auc.max[i]>cutpoint) points(y[i],x[i],pch=21,bg="black")
		else points(y[i],x[i],pch=21,bg="gray")
	}
	for(i in 1:length(x)){
		code <- code.list[i]
		if(is.element(code,REFCHEMS[,"CODE"])) {
			if(is.element(REFCHEMS[code,"ORDER"],c(1,2,3,4))) points(y[i],x[i],pch=24,bg="green",cex=1.5)
			else if(is.element(REFCHEMS[code,"ORDER"],c(6))) points(y[i],x[i],pch=25,bg="green",cex=1.5)
			else points(y[i],x[i],pch=23,bg="red",cex=1.5)
		}
	}	
	delta <- 0.03
	for(i in 1:length(x)){
		doit <- T
		code <- code.list[i]
		if(is.element(code,REFCHEMS[,"CODE"])) doit <- F
		if(x[i] < 0.45) doit <- F
		if(doit) {
			counter <- counter+1		
			ilabel <- c(ilabel,paste("[",counter,"]",sep=""))
			clabel <- cnames[i]
			if(clabel=="2,2-Bis(4-hydroxyphenyl)-1,1,1-trichloroethane") clabel <- "HPTE"
			text(y[i],x[i],ilabel[length(ilabel)],pos=3,cex=0.8)
			text(0.05,1.-counter*delta,paste(ilabel[length(ilabel)],clabel),pos=4,cex=0.8)
		}
	}

	counter0 <- counter
	for(i in 1:length(x)){
		doit <- F
		if(y[i]<3) {
			cr.mat <- vector(length=length(CONCLIST),mode="numeric")
			cr.mat[] <- 0
			for(j in 1:length(CONCLIST)) {
				conc <- CONCLIST[j]
				cr.mat[j] <- (conc/(conc+y[i]))
			}
        	auc.ideal <- receptor.score(cr.mat,do.print=F)-0.2
			if(x[i]<auc.ideal*0.5 && x[i]>cutpoint && y[i]<1 && y[i]>0.1) doit <- T
		}
		doit <- F
		if(y[i]<0.001 && x[i]<0.3 && x[i]>0.1) doit <- T
		if(doit) {
			counter <- counter+1		
			ilabel <- c(ilabel,paste("[",counter,"]",sep=""))
			clabel <- cnames[i]
			if(clabel=="2,2-Bis(4-hydroxyphenyl)-1,1,1-trichloroethane") clabel <- "HPTE"
			text(y[i],x[i],ilabel[length(ilabel)],pos=4,cex=0.8)
			points(y[i],x[i],pch=21,bg="cyan",cex=1.5)
			text(0.7e-9,0.41-(counter-counter0)*delta,paste(ilabel[length(ilabel)],clabel),pos=4,cex=0.8)
			
		}
	}

    ac50s <- CONCLIST
	nac50 <- length(ac50s)
	aucs <- ac50s
	aucs[] <- 0
    for(i in 1:nac50) {
        ac50 <- ac50s[i]
		top <- 1.
		w <- 1
		cr.mat <- vector(length=length(CONCLIST),mode="numeric")
		cr.mat[] <- 0
		for(j in 1:length(CONCLIST)) {
			conc <- CONCLIST[j]
			cr.mat[j] <- top*(conc**w/(conc**w+ac50**w))
		}
        aucs[i] <- receptor.score(cr.mat,do.print=F) - 0.2
        #cat("ac50,auc,",ac50s[i],":",format(aucs[i],digits=2),"\n")
    }
    
	#lines(aucs~ac50s,lwd=3)
	x0 <- 0.55
	x1 <- 0.15
	#arrows(x0=x0,y0=0.5,x1=x1,y1=0.5,col="red",lwd=3,length=0.1)
	xshift <- x0-x1
	#text(0.6,0.5,paste("shift=",format(xshift,digits=2),"log units"),pos=4)
    if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# plot the pseudo-AC50 / AUC shift vs. the spread
#
#--------------------------------------------------------------------------------------
pseudoAC50.spread <- function(to.file=F,cutpoint=SPECIFIC.AUC.CUTOFF) {
	cat("pseudoAC50.spread\n")
	flush.console()

    if(to.file) {
        fname <- paste("../plots/pseudoAC50AUCspread.pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,4,3,2))

    x1 <- SUPERMATRIX[,"AUC.Agonist"]
    x2 <- SUPERMATRIX[,"AUC.Antagonist"]
    x <- rowMax(cbind(x1,x2))
    y <- SUPERMATRIX[,"pseudo.AC50.median"]
    z <- SUPERMATRIX[,"AC50.spread"]
    
    col.auc.1 <- which.max(is.element(names(SUPERMATRIX),"AUC.Agonist"))
    col.auc.2 <- which.max(is.element(names(SUPERMATRIX),"AUC.A18"))
    auc.max <- rowMax(SUPERMATRIX[,col.auc.1:col.auc.2])
    cnames <- SUPERMATRIX[,"Name"]
    
    
    
    x <- x[!is.na(y)]
    z <- z[!is.na(y)]
    cnames <- cnames[!is.na(y)]
    code.list <- CODE.LIST[!is.na(y)]
    auc.max <- auc.max[!is.na(y)]
    y <- y[!is.na(y)]
	y <- -log10(y)    
	diff <- x-y
	
    xlab <- "log(AC50) spread"
    ylab <- "AC50-AUC diff"
    plot(diff~z,xlab=xlab,ylab=ylab,main="",cex.lab=1.2,cex.axis=1.2,ylim=c(-5,5),xlim=c(0,7))
    
    browser()
    lines(c(1e-6,1e6),c(cutpoint,cutpoint))
    lines(c(100,100),c(0,1.5))
	
	xlm <- x
	ylm <- y
	ylm <- ylm[x>0.1]
	xlm <- xlm[x>0.1]
	ylm <- log10(ylm)
	result <- lm(ylm~xlm)
	slope <- result[[1]][2]
	intercept <- result[[1]][1]
	
	x0 <- c(0,1.2)
	y0 <- x0
	y0[1] <- intercept
	y0[2] <- intercept + x0[2]*slope
	y0[1] <- 10**y0[1]
	y0[2] <- 10**y0[2]
	lines(c(y0[1],y0[2]),c(x0[1],x0[2]),lwd=2,lty="dashed")
	ilabel <- NULL
	clabel <- NULL
	counter <- 0
	rownames(REFCHEMS) <- REFCHEMS[,"CODE"]
	for(i in 1:length(x)){
		if(auc.max[i]>0.2) points(y[i],x[i],pch=21,bg="black")
		else points(y[i],x[i],pch=21,bg="gray")
	}
	for(i in 1:length(x)){
		code <- code.list[i]
		if(is.element(code,REFCHEMS[,"CODE"])) {
			if(is.element(REFCHEMS[code,"ORDER"],c(1,2,3,4))) points(y[i],x[i],pch=24,bg="green",cex=1.5)
			else if(is.element(REFCHEMS[code,"ORDER"],c(6))) points(y[i],x[i],pch=25,bg="green",cex=1.5)
			else points(y[i],x[i],pch=23,bg="red",cex=1.5)
		}
	}	
	for(i in 1:length(x)){
		doit <- T
		code <- code.list[i]
		if(is.element(code,REFCHEMS[,"CODE"])) doit <- F
		if(x[i] < 0.45) doit <- F
		if(doit) {
			counter <- counter+1		
			ilabel <- c(ilabel,paste("[",counter,"]",sep=""))
			clabel <- c(clabel,cnames[i])
			text(y[i],x[i],ilabel[length(ilabel)],pos=3,cex=0.8)
			text(0.02,1.25-counter*0.04,paste(ilabel[length(ilabel)],cnames[i]),pos=4,cex=0.8)

		}
	}

	counter0 <- counter
	for(i in 1:length(x)){
		doit <- F
		if(y[i]<3) {
			cr.mat <- vector(length=length(CONCLIST),mode="numeric")
			cr.mat[] <- 0
			for(j in 1:length(CONCLIST)) {
				conc <- CONCLIST[j]
				cr.mat[j] <- (conc/(conc+y[i]))
			}
        	auc.ideal <- receptor.score(cr.mat,do.print=F)
			if(x[i]<auc.ideal*0.5 && x[i]>cutpoint && y[i]<1 && y[i]>0.1) doit <- T
		}
		if(y[i]<0.01 && x[i]<0.7 && x[i]>0.6) doit <- T
		if(doit) {
			counter <- counter+1		
			ilabel <- c(ilabel,paste("[",counter,"]",sep=""))
			clabel <- cnames[i]
			if(clabel=="2,2-Bis(4-hydroxyphenyl)-1,1,1-trichloroethane") clabel <- "HPTE"
			text(y[i],x[i],ilabel[length(ilabel)],pos=3,cex=0.8)
			points(y[i],x[i],pch=21,bg="cyan",cex=1.5)
			text(0.7e-3,0.3-(counter-counter0)*0.04,paste(ilabel[length(ilabel)],clabel),pos=4,cex=0.8)
			
		}
	}


    ac50s <- CONCLIST
	nac50 <- length(ac50s)
	aucs <- ac50s
	aucs[] <- 0
    for(i in 1:nac50) {
        ac50 <- ac50s[i]
		top <- 1.
		w <- 1
		cr.mat <- vector(length=length(CONCLIST),mode="numeric")
		cr.mat[] <- 0
		for(j in 1:length(CONCLIST)) {
			conc <- CONCLIST[j]
			cr.mat[j] <- top*(conc**w/(conc**w+ac50**w))
		}
        aucs[i] <- receptor.score(cr.mat,do.print=F)
        #cat("ac50,auc,",ac50s[i],":",format(aucs[i],digits=2),"\n")
    }
	lines(aucs~ac50s,lwd=3)
	x0 <- 0.55
	x1 <- 0.15
	arrows(x0=x0,y0=0.5,x1=x1,y1=0.5,col="red",lwd=3,length=0.1)
	xshift <- x0-x1
	text(0.6,0.5,paste("shift=",format(xshift,digits=2),"log units"),pos=4)
    if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# build the summary table
#
#--------------------------------------------------------------------------------------
summaryTable <- function(to.file=F) {
	cat("summaryTable\n")
	flush.console()

    col.auc.1 <- which.max(is.element(names(SUPERMATRIX),"AUC.Agonist"))
    col.auc.2 <- which.max(is.element(names(SUPERMATRIX),"AUC.A18"))

	row.list <- names(SUPERMATRIX)[col.auc.1:col.auc.2]
	col.list <- c("Receptor",">0 to 0.1","0.1 to 0.2","0.2 to 0.5","0.5 to 1","ZT: >0 to 0.1","ZT: 0.1 to 0.2","ZT: 0.2 to 0.5","ZT: 0.5 to 1","Ratio: >0 to 0.1","Ratio: 0.1 to 0.2","Ratio: 0.2 to 0.5","Ratio: 0.5 to 1")
	result <- as.data.frame(matrix(nrow=length(row.list),ncol=length(col.list)))
	names(result) <- col.list
	result[,1] <- row.list
	for(i in 2:13) {
		result[,i] <- 0
	}
	
	sfilter <- SUPERMATRIX
	temp <- sfilter[,col.auc.1:col.auc.2]
	rs <- rowSums(temp)
	rs[rs>0] <- 1
	cat("Total number of chemicals: ",length(rs)," Those with at least one non-zero AUC: ",sum(rs),"\n")

	for(j in 1:length(row.list)) {
		row.name <- row.list[j]
		temp <- sfilter[,row.name]
		mask <- temp
		
		mask[] <- 1
		mask[temp==0] <- 0
		mask[temp>=0.1] <- 0
		result[j,2] <- sum(mask)

		mask[] <- 1
		mask[temp<0.1] <- 0
		mask[temp>0.2] <- 0
		result[j,3] <- sum(mask)

		mask[] <- 1
		mask[temp<=0.2] <- 0
		mask[temp>0.5] <- 0
		result[j,4] <- sum(mask)

		mask[] <- 1
		mask[temp<=0.5] <- 0
		result[j,5] <- sum(mask)
	}
	
	sfilter <- sfilter[sfilter[,"median.Z"]>=3,]
	sfilter <- sfilter[sfilter[,"median.T"]>=50,]
	temp <- sfilter[,col.auc.1:col.auc.2]
	rs <- rowSums(temp)
	rs[rs>0] <- 1
	cat("Total number of chemicals: ",length(rs)," Those with at least one non-zero AUC: ",sum(rs),"\n")

	for(j in 1:length(row.list)) {
		row.name <- row.list[j]
		temp <- sfilter[,row.name]
		mask <- temp
		
		mask[] <- 1
		mask[temp==0] <- 0
		mask[temp>=0.1] <- 0
		result[j,6] <- sum(mask)

		mask[] <- 1
		mask[temp<0.1] <- 0
		mask[temp>0.2] <- 0
		result[j,7] <- sum(mask)

		mask[] <- 1
		mask[temp<=0.2] <- 0
		mask[temp>0.5] <- 0
		result[j,8] <- sum(mask)

		mask[] <- 1
		mask[temp<=0.5] <- 0
		result[j,9] <- sum(mask)
	}

	for(j in 1:length(row.list)) {
		den1 <- result[j,2]
		den2 <- result[j,3]
		den3 <- result[j,4]
		den4 <- result[j,5]
		num1 <- result[j,6]
		num2 <- result[j,7]
		num3 <- result[j,8]
		num4 <- result[j,9]
		r1 <- num1 / den1
		r2 <- num2 / den2
		r3 <- num3 / den3
		r4 <- num4 / den4
		
		result[j,10] <- as.numeric(format(r1,digits=2))
		result[j,11] <- as.numeric(format(r2,digits=2))	
		result[j,12] <- as.numeric(format(r3,digits=2))	
		result[j,13] <- as.numeric(format(r4,digits=2))
	}
	outfile <- "../output/summaryTable.csv"
	write.csv(result,file=outfile, row.names=F)
	mask <- result[,3]+result[,4]+result[,5]
	mask[mask<3] <- 0
	mask[mask>0] <- 1
	for(i in 14:length(mask)) mask[i] <- 0
	mask[2] <- 0
	mask[3] <- 0
	mask[5] <- 0
	mask[6] <- 0
	
	name.list <- result[,1]
	for(i in 1:length(name.list)) name.list[i] <- substr(name.list[i],5,str_length(name.list[i]))
	sdata <- as.matrix(result[,10:13])
	sdata[sdata==0] <- 0.0
	sdata[is.nan(sdata)] <- -0.01
	
	sdata <- sdata[mask==1,]
	name.list <- name.list[mask==1]
	#stars(t(sdata),scale=F,frame.plot=T,len=1,draw.segments=T,full=F,axes=T,key.loc=c(3.5,3.2),key.labels=name.list)
	#browser()
    if(to.file) {
        fname <- paste("../plots/ZTfilter_fraction.pdf",sep="")
        pdf(file=fname,width=7,height=6,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,4,3,2))
	barplot(t(sdata),ylab="Fraction of Chemicals Remaining After- Z-T Filter",
		beside=T,names.arg=name.list,horiz=F,col=c("white","lightgray","gray","black"),
		cex.names=0.8,cex.lab=1,cex.axis=1,
		legend.text=c("<0.1","0.1-0.2","0.2-0.5",">0.5"),args.legend=list(x=22,y=1))
	if(!to.file) browser()
	else dev.off()
}	

###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
